From 9f3a633d8d6afb06045d447c2f828345cd0aa9ba Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Fri, 25 Aug 2017 15:23:07 -0700 Subject: [PATCH 001/294] Add value order and query methods to HloOrdering. These value ordering methods (for now) still exist in HloDataflowAnalysis but will be removed in a followup CL. Also, move call graph traversal code used in ordering queries from hlo_ordering to call_graph. PiperOrigin-RevId: 166533035 --- tensorflow/compiler/xla/service/BUILD | 2 + tensorflow/compiler/xla/service/call_graph.cc | 43 ++++ tensorflow/compiler/xla/service/call_graph.h | 33 +++ .../compiler/xla/service/call_graph_test.cc | 81 +++++++ .../compiler/xla/service/hlo_ordering.cc | 213 +++++++++++++----- .../compiler/xla/service/hlo_ordering.h | 28 +++ 6 files changed, 339 insertions(+), 61 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index d9060c289e..f7605cf35e 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -831,6 +831,8 @@ cc_library( ":call_graph", ":hlo", ":hlo_proto", + ":hlo_value", + ":liveness_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", diff --git a/tensorflow/compiler/xla/service/call_graph.cc b/tensorflow/compiler/xla/service/call_graph.cc index b450e0c400..c0f3bcdc22 100644 --- a/tensorflow/compiler/xla/service/call_graph.cc +++ b/tensorflow/compiler/xla/service/call_graph.cc @@ -313,6 +313,49 @@ Status CallGraph::VisitNodes(const VisitorFunction& visitor_func, return Status::OK(); } +bool CallGraph::IsFlattened() const { + for (const CallGraphNode& node : nodes_) { + if (node.context() == CallContext::kBoth) { + return false; + } + if (node.context() == CallContext::kSequential && + node.caller_callsites().size() > 1) { + return false; + } + } + return true; +} + +std::pair +CallGraph::NearestAncestorsInSameComputation(HloInstruction* a, + HloInstruction* b) const { + // Lambda which returns the next instruction in the callee->caller chain in + // the call graph. This is the unique instruction which calls the computation + // containing 'instruction'. If more than one instruction calls the + // computation containing 'instruction' or no instructions call the + // computation then nullptr is returned. + auto next_caller = [this](HloInstruction* instruction) -> HloInstruction* { + const CallGraphNode& node = GetNode(instruction->parent()); + if (node.caller_callsites().size() != 1) { + return nullptr; + } + return node.caller_callsites()[0].instruction(); + }; + + // Iterate through the callee->caller chains and find the earliest common + // element. + for (HloInstruction* a_ancestor = a; a_ancestor != nullptr; + a_ancestor = next_caller(a_ancestor)) { + for (HloInstruction* b_ancestor = b; b_ancestor != nullptr; + b_ancestor = next_caller(b_ancestor)) { + if (a_ancestor->parent() == b_ancestor->parent()) { + return {a_ancestor, b_ancestor}; + } + } + } + return {nullptr, nullptr}; +} + string CallGraph::ToString() const { string out; Appendf(&out, "Call graph for module %s:\n", module_->name().c_str()); diff --git a/tensorflow/compiler/xla/service/call_graph.h b/tensorflow/compiler/xla/service/call_graph.h index a3297ff534..688c4085df 100644 --- a/tensorflow/compiler/xla/service/call_graph.h +++ b/tensorflow/compiler/xla/service/call_graph.h @@ -203,6 +203,39 @@ class CallGraph { return Dominates(computation, instruction->parent()); } + // Returns the nearest call graph ancestors of instructions 'a' and 'b' for + // which the ancestors are in the same computation. An instruction is an call + // graph ancestor of 'a' if the instruction calls the computation containing + // 'a' either directly or transitively. Degeneratively an instruction is an + // ancestor of itself. nullptr is returned if there is no common ancestor or + // if the caller chain of 'a' or 'b' diverges (has multiple callers) before + // the nearest common ancestor. + // + // Example: + // + // Entry computation: + // %x = Call(A, {Constant(42.0)}) + // %y = Call(B, {%x}) + // + // Computation A: + // %a = Negate(Param()) + // + // Computation B: + // %b = Exp(Param()); + // + // If called with %a and %b, this function would return (%x, %y). %x is an + // ancestor of %a, and %y is an ancestor of %b, and %x and %y are in the same + // computation. + std::pair NearestAncestorsInSameComputation( + HloInstruction* a, HloInstruction* b) const; + + // Returns whether the call graph is flattened. A call graph is flattened if + // every computation called in a sequential context (eg, kWhile or kCall) has + // zero or one callsite, and no computation is called from both a parallel and + // sequential context. The call graph of a module can be flattened with + // FlattenCallGraph. + bool IsFlattened() const; + string ToString() const; private: diff --git a/tensorflow/compiler/xla/service/call_graph_test.cc b/tensorflow/compiler/xla/service/call_graph_test.cc index 3c22871b3b..4243d37a77 100644 --- a/tensorflow/compiler/xla/service/call_graph_test.cc +++ b/tensorflow/compiler/xla/service/call_graph_test.cc @@ -97,6 +97,8 @@ TEST_F(CallGraphTest, SingletonComputation) { module->AddEntryComputation(MakeScalarComputation()); std::unique_ptr call_graph = CallGraph::Build(module.get()); EXPECT_EQ(1, call_graph->nodes().size()); + EXPECT_TRUE(call_graph->IsFlattened()); + const CallGraphNode& node = call_graph->GetNode(computation); EXPECT_EQ(computation, node.computation()); EXPECT_TRUE(node.callsites().empty()); @@ -169,6 +171,10 @@ TEST_F(CallGraphTest, SequentialComputations) { std::unique_ptr call_graph = CallGraph::Build(module.get()); EXPECT_EQ(2, call_graph->nodes().size()); + // The called computation is only called from one other computation, but there + // are multiple callsites. + EXPECT_FALSE(call_graph->IsFlattened()); + const CallGraphNode& entry_node = call_graph->GetNode(entry_computation); EXPECT_EQ(entry_computation, entry_node.computation()); EXPECT_EQ(CallContext::kSequential, entry_node.context()); @@ -206,6 +212,8 @@ TEST_F(CallGraphTest, ContextBothComputations) { std::unique_ptr call_graph = CallGraph::Build(module.get()); EXPECT_EQ(2, call_graph->nodes().size()); + EXPECT_FALSE(call_graph->IsFlattened()); + const CallGraphNode& entry_node = call_graph->GetNode(entry_computation); EXPECT_EQ(entry_computation, entry_node.computation()); EXPECT_EQ(2, entry_node.callsites().size()); @@ -273,6 +281,7 @@ TEST_F(CallGraphTest, ComplexGraph) { std::unique_ptr call_graph = CallGraph::Build(module.get()); EXPECT_EQ(5, call_graph->nodes().size()); + EXPECT_FALSE(call_graph->IsFlattened()); // Entry computation has one while instruction calling two computations // (cond_computation and a_computation). @@ -347,6 +356,78 @@ TEST_F(CallGraphTest, ComplexGraph) { EXPECT_TRUE(call_graph->Dominates(cond_computation, cond_computation)); } +TEST_F(CallGraphTest, ComplexGraphNearestAncestors) { + // Test NearestAncestorsInSameComputation on a call graph of a module with + // several computation called in various contexts. The call graph looks like: + // + // entry + // / | + // a | + // / | \ | + // b | cond + // \ | + // c + // + // Calls are made via kCall, kWhile, and kMap instructions. + auto module = CreateNewModule(); + HloComputation* cond_computation = + module->AddEmbeddedComputation(MakeConditionComputation()); + HloComputation* c_computation = + module->AddEmbeddedComputation(MakeScalarComputation()); + HloComputation* b_computation = module->AddEmbeddedComputation( + MakeMappingComputation(c_computation, /*callsites=*/1)); + HloInstruction* b_map = b_computation->root_instruction(); + + HloComputation* a_computation; + HloInstruction* a_call; + HloInstruction* a_while; + { + HloComputation::Builder builder(TestName() + ".a"); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, kScalarShape, "param0")); + a_call = builder.AddInstruction( + HloInstruction::CreateCall(kScalarShape, {param0}, c_computation)); + a_while = builder.AddInstruction(HloInstruction::CreateWhile( + kScalarShape, cond_computation, b_computation, a_call)); + a_computation = module->AddEmbeddedComputation(builder.Build()); + } + + HloComputation* entry_computation; + HloInstruction* entry_while; + { + HloComputation::Builder builder(TestName() + ".entry"); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, kScalarShape, "param0")); + entry_while = builder.AddInstruction(HloInstruction::CreateWhile( + kScalarShape, cond_computation, a_computation, param0)); + entry_computation = module->AddEntryComputation(builder.Build()); + } + + std::unique_ptr call_graph = CallGraph::Build(module.get()); + EXPECT_EQ(5, call_graph->nodes().size()); + + // Verify NearestAncestorsInSameComputation for various instructions in the + // module. + EXPECT_EQ(call_graph->NearestAncestorsInSameComputation(a_call, a_call), + std::make_pair(a_call, a_call)); + + // c_computation is called from more than one site, so + // NearestAncestorsInSameComputation bails and returns nullptrs. + std::pair null_pair = {nullptr, nullptr}; + EXPECT_EQ(call_graph->NearestAncestorsInSameComputation( + b_map, c_computation->root_instruction()), + null_pair); + + EXPECT_EQ(call_graph->NearestAncestorsInSameComputation(b_map, entry_while), + std::make_pair(entry_while, entry_while)); + EXPECT_EQ(call_graph->NearestAncestorsInSameComputation(b_map, a_call), + std::make_pair(a_while, a_call)); + EXPECT_EQ(call_graph->NearestAncestorsInSameComputation(a_while, a_call), + std::make_pair(a_while, a_call)); + EXPECT_EQ(call_graph->NearestAncestorsInSameComputation(a_while, b_map), + std::make_pair(a_while, a_while)); +} + TEST_F(CallGraphTest, VisitSingletonComputation) { // Test the call graph visitor with a call graph with a single node. auto module = CreateNewModule(); diff --git a/tensorflow/compiler/xla/service/hlo_ordering.cc b/tensorflow/compiler/xla/service/hlo_ordering.cc index 4c3ff3bdaf..08f572bb2a 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/liveness_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" @@ -31,66 +32,6 @@ limitations under the License. namespace xla { -namespace { - -// Returns the nearest call graph ancestors of instructions 'a' and 'b' for -// which the ancestors are in the same computation. An instruction is an call -// graph ancestor of 'a' if the instruction calls the computation containing 'a' -// either directly or transitively. Degeneratively an instruction is an ancestor -// of itself. nullptr is returned if there is no common ancestor or if the -// caller chain of 'a' or 'b' diverges (has multiple callers) before the nearest -// common ancestor. -// -// Example: -// -// Entry computation: -// %x = Call(A, {Constant(42.0)}) -// %y = Call(B, {%x}) -// -// Computation A: -// %a = Negate(Param()) -// -// Computation B: -// %b = Exp(Param()); -// -// If called with %a and %b, this function would return (%x, %y). %x is an -// ancestor of %a, and %y is an ancestor of %b, and %x and %y are in the same -// computation. -std::pair -GetNearestCallGraphAncestorsInSameComputation(const HloInstruction* a, - const HloInstruction* b, - const CallGraph& call_graph) { - // Lambda which returns the next instruction in the callee->caller chain in - // the call graph. This is the unique instruction which calls the computation - // containing 'instruction'. If more than one instruction calls the - // computation containing 'instruction' or no instructions call the - // computation then nullptr is returned. - auto next_caller = - [&call_graph]( - const HloInstruction* instruction) -> const HloInstruction* { - const CallGraphNode& node = call_graph.GetNode(instruction->parent()); - if (node.caller_callsites().size() != 1) { - return nullptr; - } - return node.caller_callsites()[0].instruction(); - }; - - // Iterate through the callee->caller chains and find the earliest common - // element. - for (const HloInstruction* a_ancestor = a; a_ancestor != nullptr; - a_ancestor = next_caller(a_ancestor)) { - for (const HloInstruction* b_ancestor = b; b_ancestor != nullptr; - b_ancestor = next_caller(b_ancestor)) { - if (a_ancestor->parent() == b_ancestor->parent()) { - return {a_ancestor, b_ancestor}; - } - } - } - return {nullptr, nullptr}; -} - -} // namespace - bool HloOrdering::ExecutesBefore(const HloInstruction* a, const HloInstruction* b) const { // 'a' and 'b' may be in different computations. In this case, find the @@ -100,7 +41,8 @@ bool HloOrdering::ExecutesBefore(const HloInstruction* a, const HloInstruction* a_ancestor; const HloInstruction* b_ancestor; std::tie(a_ancestor, b_ancestor) = - GetNearestCallGraphAncestorsInSameComputation(a, b, *call_graph_); + call_graph_->NearestAncestorsInSameComputation( + const_cast(a), const_cast(b)); if (a_ancestor == nullptr) { // Ancestors in a common computation could not be found so consider the @@ -127,6 +69,155 @@ bool HloOrdering::ExecutesBefore(const HloInstruction* a, return ExecutesBeforeInSameComputation(a_ancestor, b_ancestor); } +bool HloOrdering::IsDefinedBefore(const HloValue& a, const HloValue& b) const { + // If 'b' is an entry param then 'a' cannot be defined before 'b' because 'b' + // is live into the module. + const HloModule* module = b.defining_instruction()->parent()->parent(); + if (b.defining_instruction()->parent() == module->entry_computation() && + b.defining_instruction()->opcode() == HloOpcode::kParameter) { + return false; + } + + // Phi values require special handling. Because XLA does not have a phi + // instruction, the definition instruction of the phis values are + // placeholders: either the subcomputation parameter (body or condition) or + // the while instruction. However, the program point where these values are + // logically defined does not necessarily coincide exactly with program point + // of these place-holder instructions. So we explicitly define the following + // order for phi values: + // + // body/condition parameter phi: + // Defined before all values defined in its computation excepting other + // phis. + // + // while phi: + // defined after all values defined in the condition or body. + // + auto is_body_or_condition_phi = [](const HloValue& v) { + return v.is_phi() && + v.defining_instruction()->opcode() == HloOpcode::kParameter; + }; + if (is_body_or_condition_phi(a) && !is_body_or_condition_phi(b) && + call_graph_->InstructionIsNestedIn(b.defining_instruction(), + a.defining_instruction()->parent())) { + return true; + } + if (is_body_or_condition_phi(b) && + call_graph_->InstructionIsNestedIn(a.defining_instruction(), + b.defining_instruction()->parent())) { + return false; + } + + // If 'b' is a while phi and 'a' is in the body or condition, then 'a' + // executes before 'b'. + if (b.is_phi() && b.defining_instruction()->opcode() == HloOpcode::kWhile && + (call_graph_->InstructionIsNestedIn( + a.defining_instruction(), b.defining_instruction()->while_body()) || + call_graph_->InstructionIsNestedIn( + a.defining_instruction(), + b.defining_instruction()->while_condition()))) { + return true; + } + + return ExecutesBefore(a.defining_instruction(), b.defining_instruction()); +} + +/* static */ +bool HloOrdering::UseIsBeforeValueDefinition(const HloUse& use, + const HloValue& value) const { + VLOG(4) << "UseIsBeforeValueDefinition(use=" << use + << ", value=" << value.ToShortString() << ")"; + if (ExecutesBefore(use.instruction, value.defining_instruction())) { + VLOG(4) << " use instruction executes before value-defining instruction"; + return true; + } + + // If the use is at the instruction where the value is defined, then the use + // is before the def if the instruction allows buffer sharing (in place + // computation). + if (use.instruction == value.defining_instruction() && + CanShareOperandBufferWithUser( + use.instruction->mutable_operand(use.operand_number), + use.operand_index, value.defining_instruction(), + value.defining_index())) { + VLOG(4) << " use is value def, and instruction can share use buffer"; + return true; + } + + // The use at a while is an input to a phi, and logically occurs before values + // are defined in the body or condition computations. + if (use.instruction->opcode() == HloOpcode::kWhile) { + const HloInstruction* xla_while = use.instruction; + if (call_graph_->InstructionIsNestedIn(value.defining_instruction(), + xla_while->while_body()) || + call_graph_->InstructionIsNestedIn(value.defining_instruction(), + xla_while->while_condition())) { + VLOG(4) << " use is while " << use.instruction->name() + << " and def is in condition or body"; + return true; + } + } + + // Similarly if the value is defined at a while, it logically occurs after any + // uses in the body or condition computations. + if (value.defining_instruction()->opcode() == HloOpcode::kWhile) { + CHECK(value.is_phi()); + const HloInstruction* xla_while = value.defining_instruction(); + if (call_graph_->InstructionIsNestedIn(use.instruction, + xla_while->while_body()) || + call_graph_->InstructionIsNestedIn(use.instruction, + xla_while->while_condition())) { + VLOG(4) << " value is while " << value.defining_instruction()->name() + << " and use is in condition or body"; + return true; + } + } + VLOG(4) << " use is not before while"; + return false; +} + +bool HloOrdering::LiveRangeStrictlyBefore(const HloValue& a, + const HloValue& b) const { + VLOG(4) << "LiveRangeStrictlyBefore(a = " << a.ToShortString() + << ", b = " << b.ToShortString() << ")"; + if (!IsDefinedBefore(a, b)) { + VLOG(4) << "a not defined before b"; + return false; + } + + // Live-out values from the module can never have ranges strictly before any + // other value. + if (a.live_out_of_module()) { + VLOG(4) << "a is live out of module"; + return false; + } + + // Live-out values of computations can never have ranges strictly before any + // other value in the computation (including values nested in + // subcomputations). + if (a.live_out_of_computation() && + call_graph_->InstructionIsNestedIn(b.defining_instruction(), + a.defining_instruction()->parent())) { + VLOG(4) << "a is live out of computation containing b"; + return false; + } + + // All uses of 'a' must be before 'b' is defined. + for (const HloUse& use : a.uses()) { + if (!UseIsBeforeValueDefinition(use, b)) { + VLOG(4) << "use of a (" << use << ") not before b is defined"; + return false; + } + } + + return true; +} + +bool HloOrdering::MayInterfere(const HloValue& a, const HloValue& b) const { + // Buffers without disjoint liveness may interfere. + return !LiveRangeStrictlyBefore(a, b) && !LiveRangeStrictlyBefore(b, a); +} + HloOrderingProto HloOrdering::ToProto() const { HloOrderingProto proto; for (const auto& computation : module_->computations()) { diff --git a/tensorflow/compiler/xla/service/hlo_ordering.h b/tensorflow/compiler/xla/service/hlo_ordering.h index 130431f280..efb5fca188 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering.h +++ b/tensorflow/compiler/xla/service/hlo_ordering.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_value.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/gtl/flatmap.h" @@ -41,11 +42,30 @@ class HloOrdering { // not reflexive, that is, an instruction does not execute before itself. bool ExecutesBefore(const HloInstruction* a, const HloInstruction* b) const; + // Returns whether the value 'a' is defined before the value 'b' under the + // given ordering. + bool IsDefinedBefore(const HloValue& a, const HloValue& b) const; + + // Returns whether the given use is before the given value definition under + // the given ordering. + bool UseIsBeforeValueDefinition(const HloUse& use, + const HloValue& value) const; + // Returns whether the given values interfere. Two values interfere if they + // may both be simultaneously live. + bool MayInterfere(const HloValue& a, const HloValue& b) const; + + // Returns true if the live range of the given value 'a' is strictly before + // the live range of value 'b' using the given HLO ordering. + bool LiveRangeStrictlyBefore(const HloValue& a, const HloValue& b) const; + // Returns the sequential instruction order for the given computation, or // nullptr if the computation does not have a sequential ordering. virtual const std::vector* SequentialOrder( const HloComputation& computation) const = 0; + // Return the call graph of the module used to compute ordering. + const CallGraph& call_graph() const { return *call_graph_; } + virtual string ToString() const = 0; // Returns the serialized representation of this ordering. @@ -81,6 +101,14 @@ class PredecessorHloOrdering : public HloOrdering { return nullptr; } + HloReachabilityMap& reachability_map(const HloComputation* computation) { + return *predecessors_.at(computation); + } + const HloReachabilityMap& reachability_map( + const HloComputation* computation) const { + return *predecessors_.at(computation); + } + protected: explicit PredecessorHloOrdering(const HloModule* module); string ToStringHelper(const string& name) const; -- GitLab From b058d2b1a5e3a2861ca803e372161e3edd3ad4a6 Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Fri, 25 Aug 2017 15:24:40 -0700 Subject: [PATCH 002/294] Cross-compile OpenBLAS for Pi Zero PiperOrigin-RevId: 166533251 --- tensorflow/core/kernels/gemm_functors.h | 18 +++-- .../tools/ci_build/pi/build_raspberry_pi.sh | 24 ++++++- third_party/toolchains/cpus/arm/CROSSTOOL.tpl | 4 +- .../toolchains/cpus/arm/build_raspberry_pi.sh | 72 ------------------- 4 files changed, 38 insertions(+), 80 deletions(-) delete mode 100755 third_party/toolchains/cpus/arm/build_raspberry_pi.sh diff --git a/tensorflow/core/kernels/gemm_functors.h b/tensorflow/core/kernels/gemm_functors.h index 7c224bcab6..4b30c1f17f 100644 --- a/tensorflow/core/kernels/gemm_functors.h +++ b/tensorflow/core/kernels/gemm_functors.h @@ -33,11 +33,20 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_types.h" +// Apple provides an optimized BLAS library that is better than Eigen for their +// devices, so use that if possible. #if defined(__APPLE__) && defined(USE_GEMM_FOR_CONV) #include -#define USE_ACCELERATE_GEMM +#define USE_CBLAS_GEMM #endif // __APPLE__ +// Older Raspberry Pi systems don't have NEON SIMD acceleration, so Eigen falls +// back to scalar code, but OpenBLAS has much faster support so prefer that. +#if defined(RASPBERRY_PI) && defined(USE_GEMM_FOR_CONV) && defined(USE_OPENBLAS) +#include +#define USE_CBLAS_GEMM +#endif + // A readable but slow implementation of matrix multiplication, useful for // debugging and understanding the algorithm. Use instead of FastGemmFunctor in // the Im2ColConvFunctor template definition inside the op registration to @@ -94,9 +103,8 @@ class FastGemmFunctor { } }; -// If we have Apple's Accelerate framework, use their implementation of GEMM to -// get a performance boost for float. -#if defined(USE_ACCELERATE_GEMM) +// If we have a fast CBLAS library, use its implementation through a wrapper. +#if defined(USE_CBLAS_GEMM) template <> class FastGemmFunctor { public: @@ -107,4 +115,4 @@ class FastGemmFunctor { lda, b, ldb, 0.0f, c, ldc); } }; -#endif // USE_ACCELERATE_GEMM +#endif // USE_CBLAS_GEMM diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index 9e6cfc017e..f53bfb59ff 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -47,8 +47,30 @@ sed -i 's/ca7beac153d4059c02c8fc59816c82d54ea47fe58365e8aded4082ded0b820c4/a34b2 sudo sed -i 's/define CURL_SIZEOF_LONG 8/define CURL_SIZEOF_LONG 4/g' /usr/include/curl/curlbuild.h sudo sed -i 's/define CURL_SIZEOF_CURL_OFF_T 8/define CURL_SIZEOF_CURL_OFF_T 4/g' /usr/include/curl/curlbuild.h +# Build the OpenBLAS library, which is faster than Eigen on the Pi Zero/One. +# TODO(petewarden) - It would be nicer to move this into the main Bazel build +# process if we can maintain a build file for this. +mkdir toolchain +cd toolchain +curl -L https://github.com/raspberrypi/tools/archive/0e906ebc527eab1cdbf7adabff5b474da9562e9f.tar.gz -o toolchain.tar.gz +tar xzf toolchain.tar.gz +mv tools-0e906ebc527eab1cdbf7adabff5b474da9562e9f/ tools +cd .. + +CROSSTOOL_CC=$(pwd)/toolchain/tools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf/bin/arm-linux-gnueabihf-gcc + +git clone https://github.com/xianyi/OpenBLAS openblas +cd openblas +make CC=${CROSSTOOL_CC} FC=${CROSSTOOL_CC} HOSTCC=gcc TARGET=ARMV6 +make PREFIX=$(pwd)/toolchain/openblas/ install +cd .. + if [[ $1 == "PI_ONE" ]]; then - PI_COPTS="--copt=-march=armv6 --copt=-mfpu=vfp" + PI_COPTS="--copt=-march=armv6 --copt=-mfpu=vfp + --copt=-DUSE_GEMM_FOR_CONV --copt=-DUSE_OPENBLAS + --copt=-isystem=$(pwd)/toolchain/openblas/include/ + --linkopt=-L$(pwd)/toolchain/openblas/lib/ + --linkopt=-l:libopenblas.a" echo "Building for the Pi One/Zero, with no NEON support" else PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 diff --git a/third_party/toolchains/cpus/arm/CROSSTOOL.tpl b/third_party/toolchains/cpus/arm/CROSSTOOL.tpl index 6753476c15..04e399bed1 100644 --- a/third_party/toolchains/cpus/arm/CROSSTOOL.tpl +++ b/third_party/toolchains/cpus/arm/CROSSTOOL.tpl @@ -77,8 +77,8 @@ toolchain { cxx_builtin_include_directory: "%{ARM_COMPILER_PATH}%/lib/gcc/arm-linux-gnueabihf/4.9.3/include-fixed" cxx_builtin_include_directory: "%{ARM_COMPILER_PATH}%/local_include" cxx_builtin_include_directory: "/usr/include" - - cxx_flag: "-std=c++11" + cxx_builtin_include_directory: "/workspace/toolchain/openblas/include/" + cxx_flag: "-std=c++11" # The cxx_builtin_include_directory directives don't seem to be adding these, so # explicitly set them as flags. There's a query to the Bazel team outstanding about # why this is necessary. diff --git a/third_party/toolchains/cpus/arm/build_raspberry_pi.sh b/third_party/toolchains/cpus/arm/build_raspberry_pi.sh deleted file mode 100755 index c9fe0bc771..0000000000 --- a/third_party/toolchains/cpus/arm/build_raspberry_pi.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -set -e - -# By default this builds packages for the Pi Two and Three only, since the NEON support -# this allows makes calculations many times faster. To support the Pi One or Zero, pass -# PI_ONE as the first argument to the script, for example: -# third_party/toolchains/cpus/arm/build_raspberry_pi.sh PI_ONE -# -# To install the cross-compilation support for Python this script needs on Ubuntu Trusty, run -# something like these steps, after backing up your original /etc/apt/sources.list file: -# -# dpkg --add-architecture armhf -# echo 'deb [arch=armhf] http://ports.ubuntu.com/ trusty main restricted universe multiverse' >> /etc/apt/sources.list.d/armhf.list -# echo 'deb [arch=armhf] http://ports.ubuntu.com/ trusty-updates main restricted universe multiverse' >> /etc/apt/sources.list.d/armhf.list -# echo 'deb [arch=armhf] http://ports.ubuntu.com/ trusty-security main restricted universe multiverse' >> /etc/apt/sources.list.d/armhf.list -# echo 'deb [arch=armhf] http://ports.ubuntu.com/ trusty-backports main restricted universe multiverse' >> /etc/apt/sources.list.d/armhf.list -# sed -i 's#deb http://archive.ubuntu.com/ubuntu/#deb [arch=amd64] http://archive.ubuntu.com/ubuntu/#g' /etc/apt/sources.list -# apt-get update -# apt-get install -y libpython-all-dev:armhf -# -# Make sure you have an up to date version of the Bazel build tool installed too. - -yes '' | ./configure - -if [[ $1 == "PI_ONE" ]]; then - PI_COPTS="--copt=-march=armv6 --copt=-mfpu=vfp" - echo "Building for the Pi One/Zero, with no NEON support" -else - PI_COPTS='--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 - --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 - --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 - --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8' - echo "Building for the Pi Two/Three, with NEON acceleration" -fi - -bazel build -c opt ${PI_COPTS} \ - --copt=-funsafe-math-optimizations --copt=-ftree-vectorize \ - --copt=-fomit-frame-pointer --cpu=armeabi \ - --crosstool_top=@local_config_arm_compiler//:toolchain \ - --verbose_failures \ - //tensorflow/tools/benchmark:benchmark_model \ - //tensorflow/tools/pip_package:build_pip_package - -TMPDIR=$(mktemp -d -t tmp.XXXXXXXXXX) -echo "Final outputs will go to ${TMPDIR}" - -# Build a universal wheel. -BDIST_OPTS="--universal" \ - bazel-bin/tensorflow/tools/pip_package/build_pip_package "${TMPDIR}" - -OLD_FN=$(ls "${TMPDIR}" | grep \.whl) -SUB='s/tensorflow-([^-]+)-([^-]+)-.*/tensorflow-\1-\2-none-any.whl/; print' -NEW_FN=$(echo "${OLD_FN}" | perl -ne "${SUB}") -mv "${TMPDIR}/${OLD_FN}" "${TMPDIR}/${NEW_FN}" -cp bazel-bin/tensorflow/tools/benchmark/benchmark_model "${TMPDIR}" - -echo "Output can be found here:" -find "${TMPDIR}" -- GitLab From aee007aabaa2e114454e78aa6381dc8bfa4dda5f Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Fri, 25 Aug 2017 16:03:19 -0700 Subject: [PATCH 003/294] [XLA] Support ReduceWindow in HloEvaluator. PiperOrigin-RevId: 166538063 --- .../compiler/xla/service/hlo_evaluator.cc | 89 +++++++++++++- .../xla/service/hlo_evaluator_test.cc | 110 ++++++++++++++++++ 2 files changed, 198 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 3cb8f10dd0..31cb64d879 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -900,6 +900,93 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); }; + Status HandleReduceWindow(HloInstruction* reduce_window, + HloInstruction* operand, const Window& window, + HloComputation* function) override { + TF_ASSIGN_OR_RETURN( + auto inferred_return_shape, + ShapeInference::InferReduceWindowShape( + /*operand_shape=*/reduce_window->operand(0)->shape(), + /*init_value=*/reduce_window->operand(1)->shape(), window, + /*to_apply_shape=*/function->ComputeProgramShape())); + TF_RET_CHECK( + ShapeUtil::Compatible(reduce_window->shape(), inferred_return_shape)) + << "return shape is set to: " + << ShapeUtil::HumanString(reduce_window->shape()) + << "but is inferred to be: " + << ShapeUtil::HumanString(inferred_return_shape); + + const Literal& operand_literal = + parent_->GetEvaluatedLiteralFor(reduce_window->operand(0)); + VLOG(3) << "HandleReduceWindow arg_literal: " << operand_literal.ToString(); + const Literal& init_literal = + parent_->GetEvaluatedLiteralFor(reduce_window->operand(1)); + VLOG(3) << "HandleReduceWindow init_literal: " << init_literal.ToString(); + TF_RET_CHECK(ShapeUtil::IsScalar(init_literal.shape())); + auto init_scalar = init_literal.Get({}); + + auto result = Literal::CreateFromShape(reduce_window->shape()); + + // Creates a Shape object from window, for iteration below. + std::vector window_dimension_sizes; + for (const auto& window_dimension : window.dimensions()) { + window_dimension_sizes.push_back(window_dimension.size()); + } + const Shape window_shape = ShapeUtil::MakeShape( + operand->shape().element_type(), window_dimension_sizes); + + DimensionVector window_index(window.dimensions_size()); + DimensionVector operand_index(ShapeUtil::Rank(operand_literal.shape())); + + // For each resulting dimension, calculate and assign computed value. + TF_RETURN_IF_ERROR(result->Populate( + [&](tensorflow::gtl::ArraySlice output_index) { + ReturnT result_val = init_scalar; + + std::fill(window_index.begin(), window_index.end(), 0); + std::fill(operand_index.begin(), operand_index.end(), 0); + + do { + // Set curr_val to 0 if out of bound (padded). + ReturnT curr_val = static_cast(0); + bool out_of_bound = false; + for (int i = 0; i < operand_index.size(); ++i) { + operand_index[i] = + output_index[i] * window.dimensions(i).stride() + + window_index[i] - window.dimensions(i).padding_low(); + if (operand_index[i] < 0 || + operand_index[i] >= operand_literal.shape().dimensions(i)) { + out_of_bound = true; + break; + } + } + if (!out_of_bound) { + curr_val = operand_literal.Get(operand_index); + } + // Evaluate computation with specified literal operands. + const auto curr_val_literal = Literal::CreateR0(curr_val); + const auto result_val_literal = + Literal::CreateR0(result_val); + const std::vector args = {curr_val_literal.get(), + result_val_literal.get()}; + // We need a new visitor for each evaluation, so that the same + // computation can be visited more than once (with different + // inputs). + HloEvaluator embedded_evaluator; + std::unique_ptr computed_result = + embedded_evaluator.Evaluate(*function, args) + .ConsumeValueOrDie(); + + result_val = computed_result->Get({}); + } while (IndexUtil::BumpIndices(window_shape, &window_index)); + + return result_val; + })); + + parent_->evaluated_[reduce_window] = std::move(result); + return Status::OK(); + }; + Status HandleSlice(HloInstruction* slice, HloInstruction* operand) override { const Shape& shape = slice->shape(); TF_ASSIGN_OR_RETURN(auto inferred_return_shape, @@ -1070,7 +1157,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { } HloEvaluator* parent_; -}; +}; // namespace xla HloEvaluator::HloEvaluator() { typed_visitors_[PRED] = MakeUnique>(this); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index 1bf483b320..9d33236df5 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -1097,6 +1097,116 @@ TEST_F(HloEvaluatorTest, ReduceAdd) { LiteralTestUtil::ExpectEqual(*expected, *result); } +TEST_F(HloEvaluatorTest, ReduceWindowMax) { + HloComputation::Builder b(TestName()); + + // arg: + // f32[2,3] { + // { 1, 2, 3 }, + // { 5, 6, 7 }, + // } + auto arg_array = MakeUnique>(2, 3); + arg_array->FillUnique(1.0f); + auto arg_literal = Literal::CreateR2FromArray2D(*arg_array); + + HloInstruction* arg_instruction = + b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal))); + + auto init_value = b.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(0.f))); + + HloComputation::Builder max_computation("max"); + Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + auto param_lhs = max_computation.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "lhs")); + auto param_rhs = max_computation.AddInstruction( + HloInstruction::CreateParameter(1, scalar_shape, "rhs")); + max_computation.AddInstruction(HloInstruction::CreateBinary( + scalar_shape, HloOpcode::kMaximum, param_lhs, param_rhs)); + HloModule module(TestName()); + auto max_func = module.AddEmbeddedComputation(max_computation.Build()); + + Window window; + WindowDimension dim; + dim.set_size(2); + dim.set_stride(1); + dim.set_padding_low(0); + dim.set_padding_high(0); + dim.set_window_dilation(1); + dim.set_base_dilation(1); + *window.add_dimensions() = dim; + *window.add_dimensions() = dim; + + Shape shape = ShapeUtil::MakeShape(F32, {1, 2}); + b.AddInstruction(HloInstruction::CreateReduceWindow( + shape, arg_instruction, init_value, window, max_func)); + + auto computation = module.AddEntryComputation(b.Build()); + std::unique_ptr result = + evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie(); + + auto expected = Literal::CreateR2({{6, 7}}); + LiteralTestUtil::ExpectEqual(*expected, *result); +} + +TEST_F(HloEvaluatorTest, ReduceWindowAdd) { + HloComputation::Builder b(TestName()); + + // arg: + // f32[2,3] { + // { 1, 2, 3 }, + // { 5, 6, 7 }, + // } + auto arg_array = MakeUnique>(2, 3); + arg_array->FillUnique(1.0f); + auto arg_literal = Literal::CreateR2FromArray2D(*arg_array); + + HloInstruction* arg_instruction = + b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal))); + + auto init_value = b.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(0.f))); + + HloComputation::Builder add_computation("add"); + Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + auto param_lhs = add_computation.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "lhs")); + auto param_rhs = add_computation.AddInstruction( + HloInstruction::CreateParameter(1, scalar_shape, "rhs")); + add_computation.AddInstruction(HloInstruction::CreateBinary( + scalar_shape, HloOpcode::kAdd, param_lhs, param_rhs)); + HloModule module(TestName()); + auto add_func = module.AddEmbeddedComputation(add_computation.Build()); + + Window window; + WindowDimension dim; + dim.set_size(1); + dim.set_stride(1); + dim.set_padding_low(0); + dim.set_padding_high(0); + dim.set_window_dilation(1); + dim.set_base_dilation(1); + *window.add_dimensions() = dim; + dim.set_size(2); + dim.set_stride(1); + dim.set_padding_low(1); + dim.set_padding_high(0); + dim.set_window_dilation(1); + dim.set_base_dilation(1); + *window.add_dimensions() = dim; + + Shape shape = ShapeUtil::MakeShape(F32, {2, 3}); + b.AddInstruction(HloInstruction::CreateReduceWindow( + shape, arg_instruction, init_value, window, add_func)); + + auto computation = module.AddEntryComputation(b.Build()); + std::unique_ptr result = + evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie(); + + auto expected = Literal::CreateR2({{1, 3, 5}, {5, 11, 13}}); + LiteralTestUtil::ExpectEqual(*expected, *result); +} + TEST_F(HloEvaluatorTest, StridedSlice) { HloComputation::Builder b(TestName()); -- GitLab From 4a0decf95f4b48429424cf824c686a5b9fdff84b Mon Sep 17 00:00:00 2001 From: Ali Yahya Date: Fri, 25 Aug 2017 16:12:18 -0700 Subject: [PATCH 004/294] Enables eager mode for a few outstanding tests for ResourceVariable now that the requisite fixes are in. PiperOrigin-RevId: 166539119 --- .../resource_variable_ops_test.py | 45 +++++++++---------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index cd89504051..e08130c103 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -145,17 +145,17 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): resource_variable_ops.var_is_initialized_op(abc.handle)), True) - # TODO(alive): fix bug in convert_to_tensor; get this to work in Eager. + @test_util.run_in_graph_and_eager_modes() def testConstraintArg(self): constraint = lambda x: x v = resource_variable_ops.ResourceVariable( - initial_value=lambda: 1, constraint=constraint) + initial_value=lambda: 1, constraint=constraint, name="var0") self.assertEqual(v.constraint, constraint) constraint = 0 with self.assertRaises(ValueError): v = resource_variable_ops.ResourceVariable( - initial_value=lambda: 1, constraint=constraint) + initial_value=lambda: 1, constraint=constraint, name="var1") # TODO(alive): how should this work in Eager mode? def testInitFn(self): @@ -165,18 +165,17 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.assertEqual(v.handle.op.colocation_groups(), v.initializer.inputs[1].op.colocation_groups()) - # TODO(alive): fix bug in convert_to_tensor; get this to work in Eager. + @test_util.run_in_graph_and_eager_modes() def testInitFnDtype(self): - with self.test_session(): - v = resource_variable_ops.ResourceVariable( - initial_value=lambda: 1, dtype=dtypes.float32) - self.assertEqual(dtypes.float32, v.value().dtype) + v = resource_variable_ops.ResourceVariable( + initial_value=lambda: 1, dtype=dtypes.float32) + self.assertEqual(dtypes.float32, v.value().dtype) - # TODO(alive): fix bug in convert_to_tensor; get this to work in Eager. + @test_util.run_in_graph_and_eager_modes() def testInitFnNoDtype(self): - with self.test_session(): - v = resource_variable_ops.ResourceVariable(initial_value=lambda: 1) - self.assertEqual(dtypes.int32, v.value().dtype) + v = resource_variable_ops.ResourceVariable(initial_value=lambda: 1, + name="var2") + self.assertEqual(dtypes.int32, v.value().dtype) @test_util.run_in_graph_and_eager_modes() def testInitializeAllVariables(self): @@ -209,7 +208,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): with self.test_session(): init_value = np.reshape(np.arange(np.power(4, 3)), (4, 4, 4)) v = resource_variable_ops.ResourceVariable( - constant_op.constant(init_value, dtype=dtypes.int32), name="var0") + constant_op.constant(init_value, dtype=dtypes.int32), name="var3") self.evaluate(variables.global_variables_initializer()) value = self.evaluate(v.sparse_read([0, 3, 1, 2])) @@ -293,18 +292,18 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() def testSharedName(self): - v = resource_variable_ops.ResourceVariable(300.0, name="var1") + v = resource_variable_ops.ResourceVariable(300.0, name="var4") self.evaluate(variables.global_variables_initializer()) w = resource_variable_ops.var_handle_op( - dtype=v.dtype.base_dtype, shape=v.get_shape(), shared_name="var1") + dtype=v.dtype.base_dtype, shape=v.get_shape(), shared_name="var4") w_read = resource_variable_ops.read_variable_op(w, v.dtype.base_dtype) self.assertEqual(300.0, self.evaluate(w_read)) x = resource_variable_ops.var_handle_op( - dtype=v.dtype.base_dtype, shape=v.get_shape(), shared_name="var2") + dtype=v.dtype.base_dtype, shape=v.get_shape(), shared_name="var5") if context.in_graph_mode(): - with self.assertRaisesOpError("Resource .*/var2/.* does not exist"): + with self.assertRaisesOpError("Resource .*/var5/.* does not exist"): x_read = resource_variable_ops.read_variable_op(x, v.dtype.base_dtype) self.evaluate(x_read) else: @@ -315,11 +314,11 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() def testSharedNameWithNamescope(self): with ops.name_scope("foo"): - v = resource_variable_ops.ResourceVariable(300.0, name="var3") + v = resource_variable_ops.ResourceVariable(300.0, name="var6") self.evaluate(variables.global_variables_initializer()) w = resource_variable_ops.var_handle_op( - dtype=v.dtype.base_dtype, shape=v.get_shape(), shared_name="foo/var3") + dtype=v.dtype.base_dtype, shape=v.get_shape(), shared_name="foo/var6") w_read = resource_variable_ops.read_variable_op(w, v.dtype.base_dtype) self.assertEqual(300.0, self.evaluate(w_read)) @@ -364,13 +363,13 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): constraint = lambda x: x with ops.name_scope("foo"): v = resource_variable_ops.ResourceVariable( - name="var5", + name="var7", initial_value=init, caching_device="cpu:0", constraint=constraint) # Test properties self.assertEqual(dtypes.int32, v.dtype) - self.assertEqual("foo/var5:0", v.name) + self.assertEqual("foo/var7:0", v.name) self.assertAllEqual([10, 20, 35], v.shape.as_list()) self.assertAllEqual(init.device, v.device) self.assertTrue(isinstance(v.handle, ops.EagerTensor)) @@ -381,8 +380,8 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): # Callable init. callable_init = lambda: init * 2 v2 = resource_variable_ops.ResourceVariable( - initial_value=callable_init, name="var6") - self.assertEqual("var6:0", v2.name) + initial_value=callable_init, name="var7") + self.assertEqual("var7:0", v2.name) self.assertAllEqual(2 * init.numpy(), v2.read_value().numpy()) # Test assign_add. -- GitLab From f1152a61faba3cba879203d1319930d8240599ea Mon Sep 17 00:00:00 2001 From: Anna R Date: Fri, 25 Aug 2017 16:22:38 -0700 Subject: [PATCH 005/294] Internal change. PiperOrigin-RevId: 166540365 --- tensorflow/python/kernel_tests/cwise_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index f95d9833d8..18801f6158 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -1956,7 +1956,7 @@ class ComplexMakeRealImagTest(test.TestCase): with self.test_session(use_gpu=use_gpu) as sess: inx = ops.convert_to_tensor(cplx) tf_angle = math_ops.angle(inx) - tf_angle_val = sess.run([tf_angle]) + tf_angle_val = sess.run(tf_angle) self.assertAllEqual(np_angle, tf_angle_val) self.assertShapeEqual(np_angle, tf_angle) -- GitLab From 8dac0eb067662ba69a7eb72e640d8ff8d146e5c9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 Aug 2017 17:05:17 -0700 Subject: [PATCH 006/294] SQL reader op is still in progress. Currently only supports sqlite3 and only supports queries of text-valued columns. PiperOrigin-RevId: 166544839 --- tensorflow/contrib/cmake/CMakeLists.txt | 4 + .../contrib/cmake/external/sqlite.cmake | 72 +++++++ .../cmake/patches/sqlite/CMakeLists.txt | 28 +++ .../contrib/data/python/kernel_tests/BUILD | 15 ++ .../kernel_tests/sql_dataset_op_test.py | 188 ++++++++++++++++++ .../contrib/data/python/ops/dataset_ops.py | 40 ++++ tensorflow/core/kernels/BUILD | 24 +++ tensorflow/core/kernels/sql/driver_manager.cc | 34 ++++ tensorflow/core/kernels/sql/driver_manager.h | 41 ++++ .../core/kernels/sql/query_connection.h | 67 +++++++ .../kernels/sql/sqlite_query_connection.cc | 121 +++++++++++ .../kernels/sql/sqlite_query_connection.h | 49 +++++ tensorflow/core/kernels/sql_dataset_ops.cc | 154 ++++++++++++++ tensorflow/core/ops/dataset_ops.cc | 16 ++ tensorflow/workspace.bzl | 8 + third_party/sqlite.BUILD | 16 ++ 16 files changed, 877 insertions(+) create mode 100644 tensorflow/contrib/cmake/external/sqlite.cmake create mode 100644 tensorflow/contrib/cmake/patches/sqlite/CMakeLists.txt create mode 100644 tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py create mode 100644 tensorflow/core/kernels/sql/driver_manager.cc create mode 100644 tensorflow/core/kernels/sql/driver_manager.h create mode 100644 tensorflow/core/kernels/sql/query_connection.h create mode 100644 tensorflow/core/kernels/sql/sqlite_query_connection.cc create mode 100644 tensorflow/core/kernels/sql/sqlite_query_connection.h create mode 100644 tensorflow/core/kernels/sql_dataset_ops.cc create mode 100644 third_party/sqlite.BUILD diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index f6a47d26c8..422df3063e 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -125,6 +125,7 @@ include(nsync) include(protobuf) include(re2) include(cub) +include(sqlite) if (tensorflow_BUILD_CC_TESTS) include(googletest) endif() @@ -142,6 +143,7 @@ set(tensorflow_EXTERNAL_LIBRARIES ${nsync_STATIC_LIBRARIES} ${protobuf_STATIC_LIBRARIES} ${re2_STATIC_LIBRARIES} + ${sqlite_STATIC_LIBRARIES} ) set(tensorflow_EXTERNAL_DEPENDENCIES zlib_copy_headers_to_destination @@ -159,6 +161,7 @@ set(tensorflow_EXTERNAL_DEPENDENCIES cub fft2d re2 + sqlite_copy_headers_to_destination ) include_directories( @@ -180,6 +183,7 @@ include_directories( ${nsync_INCLUDE_DIR} ${PROTOBUF_INCLUDE_DIRS} ${re2_INCLUDE_DIR} + ${sqlite_INCLUDE_DIR} ) if(tensorflow_ENABLE_SSL_SUPPORT) diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake new file mode 100644 index 0000000000..a8809d4a4a --- /dev/null +++ b/tensorflow/contrib/cmake/external/sqlite.cmake @@ -0,0 +1,72 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +include (ExternalProject) + +set(sqlite_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/sqlite) +set(sqlite_URL http://www.sqlite.org/2017/sqlite-amalgamation-3200000.zip) +set(sqlite_HASH SHA256=208780b3616f9de0aeb50822b7a8f5482f6515193859e91ed61637be6ad74fd4) +set(sqlite_BUILD ${CMAKE_CURRENT_BINARY_DIR}/sqlite/src/sqlite) +set(sqlite_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/sqlite/install) + +if(WIN32) + set(sqlite_STATIC_LIBRARIES ${sqlite_INSTALL}/lib/sqlite.lib) +else() + set(sqlite_STATIC_LIBRARIES ${sqlite_INSTALL}/lib/sqlite.a) +endif() + +set(sqlite_HEADERS + "${sqlite_BUILD}/sqlite3.h" +) + +if (WIN32) + ExternalProject_Add(sqlite + PREFIX sqlite + URL ${sqlite_URL} + URL_HASH ${sqlite_HASH} + PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/sqlite/CMakeLists.txt ${sqlite_BUILD} + INSTALL_DIR ${sqlite_INSTALL} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_INSTALL_PREFIX:STRING=${sqlite_INSTALL} + ) + +else() + ExternalProject_Add(sqlite + PREFIX sqlite + URL ${sqlite_URL} + URL_HASH ${sqlite_HASH} + INSTALL_DIR ${sqlite_INSTALL} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + BUILD_COMMAND $(MAKE) + INSTALL_COMMAND $(MAKE) install + CFLAGS=-fPIC + ) + +endif() + +# put sqlite includes in the directory where they are expected +add_custom_target(sqlite_create_destination_dir + COMMAND ${CMAKE_COMMAND} -E make_directory ${sqlite_INCLUDE_DIR} + DEPENDS sqlite) + +add_custom_target(sqlite_copy_headers_to_destination + DEPENDS sqlite_create_destination_dir) + +foreach(header_file ${sqlite_HEADERS}) + add_custom_command(TARGET sqlite_copy_headers_to_destination PRE_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${header_file} ${sqlite_INCLUDE_DIR}) +endforeach() \ No newline at end of file diff --git a/tensorflow/contrib/cmake/patches/sqlite/CMakeLists.txt b/tensorflow/contrib/cmake/patches/sqlite/CMakeLists.txt new file mode 100644 index 0000000000..e67792afd2 --- /dev/null +++ b/tensorflow/contrib/cmake/patches/sqlite/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 2.8.3) + +project(sqlite) + +set(SQLITE_SRCS + "sqlite3.c" +) + +set(SQLITE_INCLUDES + "sqlite3.h" +) + +include_directories("${CMAKE_CURRENT_SOURCE_DIR}") + +add_library(sqlite ${SQLITE_SRCS}) + +# C++11 +target_compile_features(sqlite PRIVATE + cxx_rvalue_references +) + +install(TARGETS sqlite + LIBRARY DESTINATION lib COMPONENT RuntimeLibraries + ARCHIVE DESTINATION lib COMPONENT Development) + +foreach(SQLITE_INCLUDE ${SQLITE_INCLUDES}) + install(FILES ${SQLITE_INCLUDE} DESTINATION include COMPONENT Development) +endforeach() diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index ae87a60c78..fb2740ffef 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -222,6 +222,21 @@ py_test( ], ) +py_test( + name = "sql_dataset_op_test", + size = "small", + srcs = ["sql_dataset_op_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:errors", + "//tensorflow/python:framework", + "//tensorflow/python:platform_test", + ], +) + py_test( name = "resample_test", size = "medium", diff --git a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py new file mode 100644 index 0000000000..e520bc05d6 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py @@ -0,0 +1,188 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for experimental sql input op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sqlite3 + +from tensorflow.contrib.data.python.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class SqlDatasetTest(test.TestCase): + + def setUp(self): + self.data_source_name = os.path.join(test.get_temp_dir(), "tftest.sqlite") + self.driver_name = array_ops.placeholder(dtypes.string, shape=[]) + self.query = array_ops.placeholder(dtypes.string, shape=[]) + self.output_types = (dtypes.string, dtypes.string, dtypes.string) + + conn = sqlite3.connect(self.data_source_name) + c = conn.cursor() + c.execute("DROP TABLE IF EXISTS students") + c.execute("DROP TABLE IF EXISTS people") + c.execute( + "CREATE TABLE IF NOT EXISTS students (id INTEGER NOT NULL PRIMARY KEY," + " first_name VARCHAR(100), last_name VARCHAR(100), motto VARCHAR(100))") + c.execute( + "INSERT INTO students (first_name, last_name, motto) VALUES ('John', " + "'Doe', 'Hi!'), ('Apple', 'Orange', 'Hi again!')") + c.execute( + "CREATE TABLE IF NOT EXISTS people (id INTEGER NOT NULL PRIMARY KEY, " + "first_name VARCHAR(100), last_name VARCHAR(100), state VARCHAR(100))") + c.execute( + "INSERT INTO people (first_name, last_name, state) VALUES ('Benjamin'," + " 'Franklin', 'Pennsylvania'), ('John', 'Doe', 'California')") + conn.commit() + conn.close() + + dataset = dataset_ops.SqlDataset(self.driver_name, self.data_source_name, + self.query, self.output_types).repeat(2) + iterator = dataset.make_initializable_iterator() + self.init_op = iterator.initializer + self.get_next = iterator.get_next() + + # Test that SqlDataset can read from a database table. + def testReadResultSet(self): + with self.test_session() as sess: + for _ in range(2): # Run twice to verify statelessness of db operations. + sess.run( + self.init_op, + feed_dict={ + self.driver_name: "sqlite", + self.query: "SELECT first_name, last_name, motto FROM students " + "ORDER BY first_name DESC" + }) + for _ in range(2): # Dataset is repeated. See setUp. + self.assertEqual((b"John", b"Doe", b"Hi!"), sess.run(self.get_next)) + self.assertEqual((b"Apple", b"Orange", b"Hi again!"), + sess.run(self.get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(self.get_next) + + # Test that SqlDataset works on a join query. + def testReadResultSetJoinQuery(self): + with self.test_session() as sess: + sess.run( + self.init_op, + feed_dict={ + self.driver_name: "sqlite", + self.query: + "SELECT students.first_name, state, motto FROM students " + "INNER JOIN people " + "ON students.first_name = people.first_name " + "AND students.last_name = people.last_name" + }) + for _ in range(2): + self.assertEqual((b"John", b"California", b"Hi!"), + sess.run(self.get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(self.get_next) + + # Test that an `OutOfRangeError` is raised on the first call to `get_next` + # if result set is empty. + def testReadEmptyResultSet(self): + with self.test_session() as sess: + sess.run( + self.init_op, + feed_dict={ + self.driver_name: "sqlite", + self.query: "SELECT first_name, last_name, motto FROM students " + "WHERE first_name = 'Nonexistent'" + }) + with self.assertRaises(errors.OutOfRangeError): + sess.run(self.get_next) + + # Test that an error is raised when `driver_name` is invalid. + def testReadResultSetWithInvalidDriverName(self): + with self.test_session() as sess: + with self.assertRaises(errors.InvalidArgumentError): + sess.run( + self.init_op, + feed_dict={ + self.driver_name: "sqlfake", + self.query: "SELECT first_name, last_name, motto FROM students " + "ORDER BY first_name DESC" + }) + + # Test that an error is raised when a column name in `query` is nonexistent + def testReadResultSetWithInvalidColumnName(self): + with self.test_session() as sess: + sess.run( + self.init_op, + feed_dict={ + self.driver_name: "sqlite", + self.query: + "SELECT first_name, last_name, fake_column FROM students " + "ORDER BY first_name DESC" + }) + with self.assertRaises(errors.UnknownError): + sess.run(self.get_next) + + # Test that an error is raised when there is a syntax error in `query`. + def testReadResultSetOfQueryWithSyntaxError(self): + with self.test_session() as sess: + sess.run( + self.init_op, + feed_dict={ + self.driver_name: "sqlite", + self.query: + "SELEmispellECT first_name, last_name, motto FROM students " + "ORDER BY first_name DESC" + }) + with self.assertRaises(errors.UnknownError): + sess.run(self.get_next) + + # Test that an error is raised when the number of columns in `query` + # does not match the length of `output_types`. + def testReadResultSetWithMismatchBetweenColumnsAndOutputTypes(self): + with self.test_session() as sess: + sess.run( + self.init_op, + feed_dict={ + self.driver_name: "sqlite", + self.query: "SELECT first_name, last_name FROM students " + "ORDER BY first_name DESC" + }) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(self.get_next) + + # Test that no results are returned when `query` is an insert query rather + # than a select query. In particular, the error refers to the number of + # output types passed to the op not matching the number of columns in the + # result set of the query (namely, 0 for an insert statement.) + def testReadResultSetOfInsertQuery(self): + with self.test_session() as sess: + sess.run( + self.init_op, + feed_dict={ + self.driver_name: "sqlite", + self.query: + "INSERT INTO students (first_name, last_name, motto) " + "VALUES ('Foo', 'Bar', 'Baz'), ('Fizz', 'Buzz', 'Fizzbuzz')" + }) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(self.get_next) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index aadcf77f3c..cb6062ceb2 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -2292,6 +2292,46 @@ class TextLineDataset(Dataset): return dtypes.string +class SqlDataset(Dataset): + """A `Dataset` consisting of the results from a SQL query.""" + + def __init__(self, driver_name, data_source_name, query, output_types): + """Creates a `SqlDataset`. + + Args: + driver_name: A 0-D `tf.string` tensor containing the database type. + Currently, the only supported value is 'sqlite'. + data_source_name: A 0-D `tf.string` tensor containing a connection string + to connect to the database. + query: A 0-D `tf.string` tensor containing the SQL query to execute. + output_types: A tuple of `tf.DType` objects representing the types of the + columns returned by `query`. + """ + super(SqlDataset, self).__init__() + self._driver_name = ops.convert_to_tensor( + driver_name, dtype=dtypes.string, name="driver_name") + self._data_source_name = ops.convert_to_tensor( + data_source_name, dtype=dtypes.string, name="data_source_name") + self._query = ops.convert_to_tensor( + query, dtype=dtypes.string, name="query") + self._output_types = output_types + + def make_dataset_resource(self): + return gen_dataset_ops.sql_dataset(self._driver_name, + self._data_source_name, self._query, + nest.flatten(self.output_types), + nest.flatten(self.output_shapes)) + + @property + def output_shapes(self): + return nest.map_structure(lambda _: tensor_shape.TensorShape([]), + self._output_types) + + @property + def output_types(self): + return self._output_types + + class TFRecordDataset(Dataset): """A `Dataset` comprising records from one or more TFRecord files.""" diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index bba4171f92..ae8109b1b9 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -4649,6 +4649,7 @@ filegroup( "sample_distorted_bounding_box_op.*", "ctc_loss_op.*", "spectrogram_convert_test_data.cc", + "sql_dataset_ops.cc", # Excluded due to experimental status: "debug_ops.*", "scatter_nd_op*", @@ -5855,6 +5856,28 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "sql_dataset_ops", + srcs = [ + "sql/driver_manager.cc", + "sql/sqlite_query_connection.cc", + "sql_dataset_ops.cc", + ], + hdrs = [ + "sql/driver_manager.h", + "sql/query_connection.h", + "sql/sqlite_query_connection.h", + ], + deps = [ + ":dataset", + "//tensorflow/core:dataset_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "@sqlite_archive//:sqlite", + ], +) + tf_kernel_library( name = "iterator_ops", srcs = ["iterator_ops.cc"], @@ -5905,6 +5928,7 @@ tf_kernel_library( ":shuffle_dataset_op", ":skip_dataset_op", ":sparse_tensor_slice_dataset_op", + ":sql_dataset_ops", ":take_dataset_op", ":tensor_dataset_op", ":tensor_slice_dataset_op", diff --git a/tensorflow/core/kernels/sql/driver_manager.cc b/tensorflow/core/kernels/sql/driver_manager.cc new file mode 100644 index 0000000000..9a5d5aa853 --- /dev/null +++ b/tensorflow/core/kernels/sql/driver_manager.cc @@ -0,0 +1,34 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/kernels/sql/driver_manager.h" +#include "tensorflow/core/kernels/sql/sqlite_query_connection.h" + +namespace tensorflow { + +namespace sql { + +std::unique_ptr DriverManager::CreateQueryConnection( + const string& driver_name) { + if (driver_name == "sqlite") { + return std::unique_ptr(new SqliteQueryConnection()); + } else { // TODO(b/64276826, b/64276995) Add support for other db types. + // Change to registry pattern. + return nullptr; + } +} + +} // namespace sql + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/sql/driver_manager.h b/tensorflow/core/kernels/sql/driver_manager.h new file mode 100644 index 0000000000..53350268d3 --- /dev/null +++ b/tensorflow/core/kernels/sql/driver_manager.h @@ -0,0 +1,41 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_DRIVER_MANAGER_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_DRIVER_MANAGER_H_ + +#include "tensorflow/core/kernels/sql/query_connection.h" + +namespace tensorflow { + +namespace sql { + +// A factory class for creating `QueryConnection` instances. +class DriverManager { + public: + // A factory method for creating `QueryConnection` instances. + // + // `driver_name` is the database type (e.g. 'sqlite'). `driver_name` + // corresponds to a `QueryConnection` subclass. For example, if `driver_name` + // == `sqlite`, then `CreateQueryConnection` will create a + // `SqliteQueryConnection` instance. + static std::unique_ptr CreateQueryConnection( + const string& driver_name); +}; + +} // namespace sql + +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_DRIVER_MANAGER_H_ diff --git a/tensorflow/core/kernels/sql/query_connection.h b/tensorflow/core/kernels/sql/query_connection.h new file mode 100644 index 0000000000..f9945aee7d --- /dev/null +++ b/tensorflow/core/kernels/sql/query_connection.h @@ -0,0 +1,67 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_QUERY_CONNECTION_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_QUERY_CONNECTION_H_ + +#include "tensorflow/core/framework/tensor.h" + +namespace tensorflow { + +namespace sql { +// This interface allows a user to connect to a database, execute a query, and +// iterate over the result set, putting the results into an output tensor. +// A subclass implementation is required for each type of database +// (e.g. sqlite3, mysql, etc.) +// +// Presently, a `QueryConnection` instance can only handle one query at a time. +// In a future extension, this class may be refactored so that it creates +// instances of a new class (named, say, `Statement`) which could have a +// one-to-one correspondence with queries. This would make `QueryConnection` +// more consistent with `Connection` classes of other database APIs. +// `QueryConnection` would then be renamed simply `Connection`. +// +// This class is not thread safe. Access to it is guarded by a mutex in +// `SqlDatasetOp::Dataset::Iterator`. +class QueryConnection { + public: + virtual ~QueryConnection() {} + // Opens a connection to the database named by `data_source_name`. Prepares to + // execute `query` against the database. + // + // The client must call `Close()` to release the connection resources, even + // if `Open()` fails. `Close()` must be called before making another call + // to `Open()`. + virtual Status Open(const string& data_source_name, const string& query, + const DataTypeVector& output_types) = 0; + // Closes an opened connection. + virtual Status Close() = 0; + // Retrieves the next row of the result set of the query from the most recent + // call to `Open()`. + // + // If such a row exists, then the row will be stored in `*out_tensors`, and + // `false` will be stored in `*end_of_sequence`. + // + // If there are no more rows in the result set, then instead `true` will be + // stored in `*end_of_sequence`, and the content of `*out_tensors` will be + // undefined. + virtual Status GetNext(std::vector* out_tensors, + bool* end_of_sequence) = 0; +}; + +} // namespace sql + +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_QUERY_CONNECTION_H_ diff --git a/tensorflow/core/kernels/sql/sqlite_query_connection.cc b/tensorflow/core/kernels/sql/sqlite_query_connection.cc new file mode 100644 index 0000000000..4bcf82ae28 --- /dev/null +++ b/tensorflow/core/kernels/sql/sqlite_query_connection.cc @@ -0,0 +1,121 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/kernels/sql/sqlite_query_connection.h" +#include "tensorflow/core/lib/strings/stringprintf.h" + +namespace tensorflow { + +namespace sql { + +// Returns a Status with the sqlite error message corresponding to the +// sqlite error number, `sqlite_err`. +static Status SqliteErrorToStatus(sqlite3* db, int sqlite_err) { + if (sqlite_err == SQLITE_OK) { + return Status::OK(); + } else { + const char* err_msg = sqlite3_errmsg(db); + // TODO(b/64276468) Be smart about the error code being returned + return errors::Unknown( + tensorflow::strings::Printf("Sqlite error: %s", err_msg)); + } +} + +SqliteQueryConnection::SqliteQueryConnection(){}; + +SqliteQueryConnection::~SqliteQueryConnection() { + Status s = Close(); + if (!s.ok()) { + LOG(WARNING) << "Failed to close query connection: " << s; + } +} + +Status SqliteQueryConnection::Open(const string& data_source_name, + const string& query, + const DataTypeVector& output_types) { + if (db_ != nullptr) { + return errors::FailedPrecondition( + "Failed to open query connection: Connection already opeend."); + } + int err = sqlite3_open(data_source_name.c_str(), &db_); + Status s = SqliteErrorToStatus(db_, err); + if (s.ok()) { + query_ = query; + output_types_ = output_types; + } + return s; +} + +Status SqliteQueryConnection::Close() { + int err = sqlite3_finalize(stmt_); + if (err != SQLITE_OK) { + return SqliteErrorToStatus(db_, err); + } + stmt_ = nullptr; + err = sqlite3_close(db_); + if (err != SQLITE_OK) { + return SqliteErrorToStatus(db_, err); + } + db_ = nullptr; + return Status::OK(); +} + +Status SqliteQueryConnection::GetNext(std::vector* out_tensors, + bool* end_of_sequence) { + if (stmt_ == nullptr) { + Status s = ExecuteQuery(); + if (!s.ok()) { + return s; + } + } + int rc = sqlite3_step(stmt_); + if (rc == SQLITE_ROW) { + for (int i = 0; i < column_count_; i++) { + // TODO(b/64276939) Support other tensorflow types. Interpret columns as + // the types that the client specifies. + Tensor tensor(cpu_allocator(), DT_STRING, {}); + string value( + reinterpret_cast(sqlite3_column_text(stmt_, i))); + tensor.scalar()() = value; + out_tensors->emplace_back(std::move(tensor)); + } + *end_of_sequence = false; + return Status::OK(); + } else if (rc == SQLITE_DONE) { + *end_of_sequence = true; + return Status::OK(); + } else { + return SqliteErrorToStatus(db_, rc); + } +} + +Status SqliteQueryConnection::ExecuteQuery() { + int err = sqlite3_prepare_v2(db_, query_.c_str(), -1, &stmt_, nullptr); + Status s = SqliteErrorToStatus(db_, err); + if (s.ok()) { + int column_count = sqlite3_column_count(stmt_); + if (column_count != output_types_.size()) { + return errors::InvalidArgument(tensorflow::strings::Printf( + "The number of columns in query (%d) must match the number of " + "elements in output_types (%zu).", + column_count, output_types_.size())); + } + column_count_ = column_count; + } + return s; +} + +} // namespace sql + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/sql/sqlite_query_connection.h b/tensorflow/core/kernels/sql/sqlite_query_connection.h new file mode 100644 index 0000000000..f93b203a5b --- /dev/null +++ b/tensorflow/core/kernels/sql/sqlite_query_connection.h @@ -0,0 +1,49 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_SQLITE_QUERY_CONNECTION_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_SQLITE_QUERY_CONNECTION_H_ + +#include "sqlite3.h" +#include "tensorflow/core/kernels/sql/query_connection.h" + +namespace tensorflow { + +namespace sql { + +class SqliteQueryConnection : public QueryConnection { + public: + SqliteQueryConnection(); + ~SqliteQueryConnection() override; + Status Open(const string& data_source_name, const string& query, + const DataTypeVector& output_types) override; + Status Close() override; + Status GetNext(std::vector* out_tensors, + bool* end_of_sequence) override; + + private: + // Executes the query string `query_`. + Status ExecuteQuery(); + sqlite3* db_ = nullptr; + sqlite3_stmt* stmt_ = nullptr; + int column_count_ = 0; + string query_; + DataTypeVector output_types_; +}; + +} // namespace sql + +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SQL_SQLITE_QUERY_CONNECTION_H_ diff --git a/tensorflow/core/kernels/sql_dataset_ops.cc b/tensorflow/core/kernels/sql_dataset_ops.cc new file mode 100644 index 0000000000..d17ae53cb4 --- /dev/null +++ b/tensorflow/core/kernels/sql_dataset_ops.cc @@ -0,0 +1,154 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/dataset.h" +#include "tensorflow/core/kernels/sql/driver_manager.h" +#include "tensorflow/core/kernels/sql/query_connection.h" +#include "tensorflow/core/lib/io/inputbuffer.h" +#include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/lib/strings/stringprintf.h" + +namespace tensorflow { + +namespace { +// See documentation in ../ops/dataset_ops.cc for a high-level +// description of the following ops. + +class SqlDatasetOp : public DatasetOpKernel { + public: + explicit SqlDatasetOp(OpKernelConstruction* ctx) : DatasetOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); + } + void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { + string driver_name; + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "driver_name", &driver_name)); + + string data_source_name; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "data_source_name", + &data_source_name)); + + string query; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "query", &query)); + + // TODO(b/64276826) Change this check when we add support for other + // databases. + OP_REQUIRES(ctx, driver_name == "sqlite", + errors::InvalidArgument(tensorflow::strings::Printf( + "The database type, %s, is not supported by SqlDataset. " + "The set of supported databases is: {'sqlite'}.", + driver_name.c_str()))); + // TODO(b/64276939) Remove this check when we add support for other + // tensorflow types. + for (const DataType& dt : output_types_) { + OP_REQUIRES(ctx, dt == DataType::DT_STRING, + errors::InvalidArgument( + "Each element of `output_types_` must be DT_STRING.")); + } + for (const PartialTensorShape& pts : output_shapes_) { + OP_REQUIRES(ctx, pts.dims() == 0, + errors::InvalidArgument( + "Each element of `output_shapes_` must be a scalar.")); + } + + *output = new Dataset(driver_name, data_source_name, query, output_types_, + output_shapes_); + } + + private: + class Dataset : public DatasetBase { + public: + Dataset(const string& driver_name, const string& data_source_name, + const string& query, const DataTypeVector& output_types, + const std::vector& output_shapes) + : driver_name_(driver_name), + data_source_name_(data_source_name), + query_(query), + output_types_(output_types), + output_shapes_(output_shapes) {} + + std::unique_ptr MakeIterator( + const string& prefix) const override { + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::Sql")})); + } + + const DataTypeVector& output_dtypes() const override { + return output_types_; + } + + const std::vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() override { return "SqlDatasetOp::Dataset"; } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params) {} + ~Iterator() override { + if (query_connection_initialized_) { + Status s = query_connection_->Close(); + if (!s.ok()) { + LOG(WARNING) << "Failed to close query connection: " << s; + } + } + } + + Status GetNextInternal(IteratorContext* /*ctx*/, + std::vector* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + if (!query_connection_initialized_) { + query_connection_initialized_ = true; + query_connection_ = sql::DriverManager::CreateQueryConnection( + dataset()->driver_name_); + Status s = query_connection_->Open(dataset()->data_source_name_, + dataset()->query_, + dataset()->output_types_); + if (!s.ok()) { + LOG(WARNING) << "Failed to connect to database: " << s; + return s; + } + } + return query_connection_->GetNext(out_tensors, end_of_sequence); + } + + private: + mutex mu_; + std::unique_ptr query_connection_ GUARDED_BY(mu_); + bool query_connection_initialized_ GUARDED_BY(mu_) = false; + }; + const string driver_name_; + const string data_source_name_; + const string query_; + const DataTypeVector output_types_; + const std::vector output_shapes_; + }; + DataTypeVector output_types_; + std::vector output_shapes_; +}; + +REGISTER_KERNEL_BUILDER(Name("SqlDataset").Device(DEVICE_CPU), SqlDatasetOp); + +} // namespace + +} // namespace tensorflow diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index f10e905361..34b9a2119a 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -420,6 +420,22 @@ compression_type: A scalar containing either (i) the empty string (no buffer_size: A scalar containing the number of bytes to buffer. )doc"); +REGISTER_OP("SqlDataset") + .Input("driver_name: string") + .Input("data_source_name: string") + .Input("query: string") + .Output("handle: resource") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset that executes a SQL query and emits rows of the result set. + +driver_name: The database type. Currently, the only supported type is 'sqlite'. +data_source_name: A connection string to connect to the database. +query: A SQL query to execute. +)doc"); + REGISTER_OP("FixedLengthRecordDataset") .Input("filenames: string") .Input("header_bytes: int64") diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 0d171760a5..2c7acd809a 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -296,6 +296,14 @@ def tf_workspace(path_prefix="", tf_repo_name=""): build_file = str(Label("//third_party:png.BUILD")), ) + native.new_http_archive( + name = "sqlite_archive", + urls = ["http://www.sqlite.org/2017/sqlite-amalgamation-3200000.zip"], + sha256 = "208780b3616f9de0aeb50822b7a8f5482f6515193859e91ed61637be6ad74fd4", + strip_prefix = "sqlite-amalgamation-3200000", + build_file = str(Label("//third_party:sqlite.BUILD")) + ) + native.new_http_archive( name = "gif_archive", urls = [ diff --git a/third_party/sqlite.BUILD b/third_party/sqlite.BUILD new file mode 100644 index 0000000000..f593b71542 --- /dev/null +++ b/third_party/sqlite.BUILD @@ -0,0 +1,16 @@ +# Description: +# Sqlite3 library. Provides utilities for interacting +# with sqlite3 databases. + +licenses(["notice"]) # BSD/MIT-like license + +exports_files(["LICENSE"]) + +cc_library( + name = "sqlite", + srcs = ["sqlite3.c"], + hdrs = ["sqlite3.h"], + includes = ["."], + linkopts = ["-lm"], + visibility = ["//visibility:public"], +) -- GitLab From 164b2ab29e84d512c1874fbc44e369c93835a352 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Fri, 25 Aug 2017 17:07:04 -0700 Subject: [PATCH 007/294] Update tf.contrib.keras with the latest changes from version 2.0.8. PiperOrigin-RevId: 166545077 --- tensorflow/contrib/keras/BUILD | 4 +- .../contrib/keras/python/keras/__init__.py | 2 +- .../keras/applications/imagenet_utils.py | 90 +- .../keras/applications/imagenet_utils_test.py | 50 +- .../python/keras/applications/inception_v3.py | 5 +- .../python/keras/applications/mobilenet.py | 20 +- .../keras/applications/mobilenet_test.py | 38 + .../python/keras/applications/resnet50.py | 10 +- .../keras/python/keras/applications/vgg16.py | 5 +- .../keras/python/keras/applications/vgg19.py | 5 +- .../python/keras/applications/xception.py | 5 +- .../contrib/keras/python/keras/backend.py | 219 +++-- .../keras/python/keras/backend_test.py | 35 +- .../contrib/keras/python/keras/callbacks.py | 23 +- .../keras/python/keras/callbacks_test.py | 222 ++++- .../keras/python/keras/engine/topology.py | 100 +- .../python/keras/engine/topology_test.py | 48 + .../keras/python/keras/engine/training.py | 905 +++++++++++------- .../python/keras/engine/training_test.py | 440 ++++++++- .../python/keras/layers/convolutional.py | 12 +- .../keras/layers/convolutional_recurrent.py | 45 +- .../layers/convolutional_recurrent_test.py | 21 +- .../contrib/keras/python/keras/layers/core.py | 9 +- .../keras/python/keras/layers/lstm_test.py | 27 + .../keras/python/keras/layers/merge.py | 59 ++ .../keras/python/keras/layers/merge_test.py | 14 + .../keras/python/keras/layers/recurrent.py | 6 + .../keras/python/keras/layers/wrappers.py | 25 +- .../python/keras/layers/wrappers_test.py | 39 +- .../contrib/keras/python/keras/losses.py | 6 +- .../contrib/keras/python/keras/models.py | 391 ++++++-- .../contrib/keras/python/keras/models_test.py | 99 ++ .../contrib/keras/python/keras/optimizers.py | 234 +++-- .../keras/python/keras/optimizers_test.py | 31 +- .../keras/python/keras/preprocessing/image.py | 8 +- .../python/keras/preprocessing/image_test.py | 12 +- .../python/keras/preprocessing/sequence.py | 7 +- .../keras/python/keras/preprocessing/text.py | 8 +- .../python/keras/preprocessing/text_test.py | 7 + .../keras/python/keras/utils/data_utils.py | 14 +- .../keras/python/keras/utils/generic_utils.py | 7 +- .../keras/python/keras/utils/io_utils.py | 2 +- tensorflow/tools/ci_build/ci_sanity.sh | 3 +- 43 files changed, 2501 insertions(+), 811 deletions(-) diff --git a/tensorflow/contrib/keras/BUILD b/tensorflow/contrib/keras/BUILD index 7a562f727e..a09045d7fd 100644 --- a/tensorflow/contrib/keras/BUILD +++ b/tensorflow/contrib/keras/BUILD @@ -263,7 +263,7 @@ py_test( py_test( name = "mobilenet_test", - size = "small", + size = "medium", srcs = ["python/keras/applications/mobilenet_test.py"], srcs_version = "PY2AND3", deps = [ @@ -608,7 +608,7 @@ py_test( py_test( name = "callbacks_test", - size = "small", + size = "medium", srcs = ["python/keras/callbacks_test.py"], srcs_version = "PY2AND3", tags = ["notsan"], diff --git a/tensorflow/contrib/keras/python/keras/__init__.py b/tensorflow/contrib/keras/python/keras/__init__.py index 19380bc8c5..a3edb29170 100644 --- a/tensorflow/contrib/keras/python/keras/__init__.py +++ b/tensorflow/contrib/keras/python/keras/__init__.py @@ -37,4 +37,4 @@ from tensorflow.contrib.keras.python.keras import utils from tensorflow.contrib.keras.python.keras import wrappers from tensorflow.contrib.keras.python.keras.layers import Input -__version__ = '2.0.6-tf' +__version__ = '2.0.8-tf' diff --git a/tensorflow/contrib/keras/python/keras/applications/imagenet_utils.py b/tensorflow/contrib/keras/python/keras/applications/imagenet_utils.py index a64021ae49..ce287dbd66 100644 --- a/tensorflow/contrib/keras/python/keras/applications/imagenet_utils.py +++ b/tensorflow/contrib/keras/python/keras/applications/imagenet_utils.py @@ -22,6 +22,7 @@ import json from tensorflow.contrib.keras.python.keras import backend as K from tensorflow.contrib.keras.python.keras.utils.data_utils import get_file +from tensorflow.python.platform import tf_logging as logging CLASS_INDEX = None @@ -43,19 +44,25 @@ def preprocess_input(x, data_format=None): assert data_format in {'channels_last', 'channels_first'} if data_format == 'channels_first': - # 'RGB'->'BGR' - x = x[:, ::-1, :, :] - # Zero-center by mean pixel - x[:, 0, :, :] -= 103.939 - x[:, 1, :, :] -= 116.779 - x[:, 2, :, :] -= 123.68 + if x.ndim == 3: + # 'RGB'->'BGR' + x = x[::-1, ...] + # Zero-center by mean pixel + x[0, :, :] -= 103.939 + x[1, :, :] -= 116.779 + x[2, :, :] -= 123.68 + else: + x = x[:, ::-1, ...] + x[:, 0, :, :] -= 103.939 + x[:, 1, :, :] -= 116.779 + x[:, 2, :, :] -= 123.68 else: # 'RGB'->'BGR' - x = x[:, :, :, ::-1] + x = x[..., ::-1] # Zero-center by mean pixel - x[:, :, :, 0] -= 103.939 - x[:, :, :, 1] -= 116.779 - x[:, :, :, 2] -= 123.68 + x[..., 0] -= 103.939 + x[..., 1] -= 116.779 + x[..., 2] -= 123.68 return x @@ -94,8 +101,12 @@ def decode_predictions(preds, top=5): return results -def _obtain_input_shape(input_shape, default_size, min_size, data_format, - include_top): +def _obtain_input_shape(input_shape, + default_size, + min_size, + data_format, + require_flatten, + weights=None): """Internal utility to compute/validate an ImageNet model's input shape. Arguments: @@ -104,8 +115,11 @@ def _obtain_input_shape(input_shape, default_size, min_size, data_format, default_size: default input width/height for the model. min_size: minimum input width/height accepted by the model. data_format: image data format to use. - include_top: whether the model is expected to + require_flatten: whether the model is expected to be linked to a classifier via a Flatten layer. + weights: one of `None` (random initialization) + or 'imagenet' (pre-training on ImageNet). + If weights='imagenet' input channels must be equal to 3. Returns: An integer shape tuple (may include None entries). @@ -113,43 +127,67 @@ def _obtain_input_shape(input_shape, default_size, min_size, data_format, Raises: ValueError: in case of invalid argument values. """ - if data_format == 'channels_first': - default_shape = (3, default_size, default_size) + if weights != 'imagenet' and input_shape and len(input_shape) == 3: + if data_format == 'channels_first': + if input_shape[0] not in {1, 3}: + logging.warning('This model usually expects 1 or 3 input channels. ' + 'However, it was passed an input_shape with ' + + str(input_shape[0]) + ' input channels.') + default_shape = (input_shape[0], default_size, default_size) + else: + if input_shape[-1] not in {1, 3}: + logging.warning('This model usually expects 1 or 3 input channels. ' + 'However, it was passed an input_shape with ' + + str(input_shape[-1]) + ' input channels.') + default_shape = (default_size, default_size, input_shape[-1]) else: - default_shape = (default_size, default_size, 3) - if include_top: + if data_format == 'channels_first': + default_shape = (3, default_size, default_size) + else: + default_shape = (default_size, default_size, 3) + if weights == 'imagenet' and require_flatten: if input_shape is not None: if input_shape != default_shape: - raise ValueError('When setting`include_top=True`, ' + raise ValueError('When setting`include_top=True` ' + 'and loading `imagenet` weights, ' '`input_shape` should be ' + str(default_shape) + '.') - input_shape = default_shape - else: + return default_shape + if input_shape: if data_format == 'channels_first': if input_shape is not None: if len(input_shape) != 3: raise ValueError('`input_shape` must be a tuple of three integers.') - if input_shape[0] != 3: + if input_shape[0] != 3 and weights == 'imagenet': raise ValueError('The input must have 3 channels; got ' '`input_shape=' + str(input_shape) + '`') if ((input_shape[1] is not None and input_shape[1] < min_size) or (input_shape[2] is not None and input_shape[2] < min_size)): raise ValueError('Input size must be at least ' + str(min_size) + 'x' - + str(min_size) + ', got ' + + str(min_size) + '; got ' '`input_shape=' + str(input_shape) + '`') - else: - input_shape = (3, None, None) else: if input_shape is not None: if len(input_shape) != 3: raise ValueError('`input_shape` must be a tuple of three integers.') - if input_shape[-1] != 3: + if input_shape[-1] != 3 and weights == 'imagenet': raise ValueError('The input must have 3 channels; got ' '`input_shape=' + str(input_shape) + '`') if ((input_shape[0] is not None and input_shape[0] < min_size) or (input_shape[1] is not None and input_shape[1] < min_size)): raise ValueError('Input size must be at least ' + str(min_size) + 'x' - + str(min_size) + ', got ' + + str(min_size) + '; got ' '`input_shape=' + str(input_shape) + '`') + else: + if require_flatten: + input_shape = default_shape + else: + if data_format == 'channels_first': + input_shape = (3, None, None) else: input_shape = (None, None, 3) + if require_flatten: + if None in input_shape: + raise ValueError('If `include_top` is True, ' + 'you should specify a static `input_shape`. ' + 'Got `input_shape=' + str(input_shape) + '`') return input_shape diff --git a/tensorflow/contrib/keras/python/keras/applications/imagenet_utils_test.py b/tensorflow/contrib/keras/python/keras/applications/imagenet_utils_test.py index 378c06d30d..fa0b9ec299 100644 --- a/tensorflow/contrib/keras/python/keras/applications/imagenet_utils_test.py +++ b/tensorflow/contrib/keras/python/keras/applications/imagenet_utils_test.py @@ -27,16 +27,26 @@ from tensorflow.python.platform import test class ImageNetUtilsTest(test.TestCase): def test_preprocess_input(self): - x = np.random.uniform(0, 255, (2, 3, 2, 3)) + # Test batch of images + x = np.random.uniform(0, 255, (2, 10, 10, 3)) self.assertEqual( keras.applications.imagenet_utils.preprocess_input(x).shape, x.shape) - out1 = keras.applications.imagenet_utils.preprocess_input( x, 'channels_last') out2 = keras.applications.imagenet_utils.preprocess_input( np.transpose(x, (0, 3, 1, 2)), 'channels_first') self.assertAllClose(out1, out2.transpose(0, 2, 3, 1)) + # Test single image + x = np.random.uniform(0, 255, (10, 10, 3)) + self.assertEqual( + keras.applications.imagenet_utils.preprocess_input(x).shape, x.shape) + out1 = keras.applications.imagenet_utils.preprocess_input( + x, 'channels_last') + out2 = keras.applications.imagenet_utils.preprocess_input( + np.transpose(x, (2, 0, 1)), 'channels_first') + self.assertAllClose(out1, out2.transpose(1, 2, 0)) + def test_obtain_input_shape(self): # input_shape and default_size are not identical. with self.assertRaises(ValueError): @@ -45,7 +55,8 @@ class ImageNetUtilsTest(test.TestCase): default_size=299, min_size=139, data_format='channels_last', - include_top=True) + require_flatten=True, + weights='imagenet') # Test invalid use cases for data_format in ['channels_last', 'channels_first']: @@ -61,7 +72,7 @@ class ImageNetUtilsTest(test.TestCase): default_size=None, min_size=139, data_format=data_format, - include_top=False) + require_flatten=False) # shape is 1D. shape = (100,) @@ -75,7 +86,7 @@ class ImageNetUtilsTest(test.TestCase): default_size=None, min_size=139, data_format=data_format, - include_top=False) + require_flatten=False) # the number of channels is 5 not 3. shape = (100, 100) @@ -89,43 +100,60 @@ class ImageNetUtilsTest(test.TestCase): default_size=None, min_size=139, data_format=data_format, - include_top=False) + require_flatten=False) + + # require_flatten=True with dynamic input shape. + with self.assertRaises(ValueError): + keras.applications.imagenet_utils._obtain_input_shape( + input_shape=None, + default_size=None, + min_size=139, + data_format='channels_first', + require_flatten=True) + + assert keras.applications.imagenet_utils._obtain_input_shape( + input_shape=(3, 200, 200), + default_size=None, + min_size=139, + data_format='channels_first', + require_flatten=True) == (3, 200, 200) assert keras.applications.imagenet_utils._obtain_input_shape( input_shape=None, default_size=None, min_size=139, data_format='channels_last', - include_top=False) == (None, None, 3) + require_flatten=False) == (None, None, 3) assert keras.applications.imagenet_utils._obtain_input_shape( input_shape=None, default_size=None, min_size=139, data_format='channels_first', - include_top=False) == (3, None, None) + require_flatten=False) == (3, None, None) assert keras.applications.imagenet_utils._obtain_input_shape( input_shape=None, default_size=None, min_size=139, data_format='channels_last', - include_top=False) == (None, None, 3) + require_flatten=False) == (None, None, 3) assert keras.applications.imagenet_utils._obtain_input_shape( input_shape=(150, 150, 3), default_size=None, min_size=139, data_format='channels_last', - include_top=False) == (150, 150, 3) + require_flatten=False) == (150, 150, 3) assert keras.applications.imagenet_utils._obtain_input_shape( input_shape=(3, None, None), default_size=None, min_size=139, data_format='channels_first', - include_top=False) == (3, None, None) + require_flatten=False) == (3, None, None) if __name__ == '__main__': test.main() + diff --git a/tensorflow/contrib/keras/python/keras/applications/inception_v3.py b/tensorflow/contrib/keras/python/keras/applications/inception_v3.py index f77e4a8341..2fdc62f2f2 100644 --- a/tensorflow/contrib/keras/python/keras/applications/inception_v3.py +++ b/tensorflow/contrib/keras/python/keras/applications/inception_v3.py @@ -125,7 +125,7 @@ def InceptionV3(include_top=True, if `include_top` is False (otherwise the input shape has to be `(299, 299, 3)` (with `channels_last` data format) or `(3, 299, 299)` (with `channels_first` data format). - It should have exactly 3 inputs channels, + It should have exactly 3 input channels, and width and height should be no smaller than 139. E.g. `(150, 150, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction @@ -165,7 +165,8 @@ def InceptionV3(include_top=True, default_size=299, min_size=139, data_format=K.image_data_format(), - include_top=include_top) + require_flatten=False, + weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) diff --git a/tensorflow/contrib/keras/python/keras/applications/mobilenet.py b/tensorflow/contrib/keras/python/keras/applications/mobilenet.py index 37240234d3..2a93486401 100644 --- a/tensorflow/contrib/keras/python/keras/applications/mobilenet.py +++ b/tensorflow/contrib/keras/python/keras/applications/mobilenet.py @@ -327,7 +327,7 @@ def MobileNet(input_shape=None, # pylint: disable=invalid-name if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or (3, 224, 224) (with `channels_first` data format). - It should have exactly 3 inputs channels, + It should have exactly 3 input channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. alpha: controls the width of the network. @@ -388,12 +388,26 @@ def MobileNet(input_shape=None, # pylint: disable=invalid-name 'as true, `classes` should be 1000') # Determine proper input shape. + if input_shape is None: + default_size = 224 + else: + if K.image_data_format() == 'channels_first': + rows = input_shape[1] + cols = input_shape[2] + else: + rows = input_shape[0] + cols = input_shape[1] + if rows == cols and rows in [128, 160, 192, 224]: + default_size = rows + else: + default_size = 224 input_shape = _obtain_input_shape( input_shape, - default_size=224, + default_size=default_size, min_size=32, data_format=K.image_data_format(), - include_top=include_top or weights) + require_flatten=include_top, + weights=weights) if K.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: diff --git a/tensorflow/contrib/keras/python/keras/applications/mobilenet_test.py b/tensorflow/contrib/keras/python/keras/applications/mobilenet_test.py index d67964c02b..19e69c764a 100644 --- a/tensorflow/contrib/keras/python/keras/applications/mobilenet_test.py +++ b/tensorflow/contrib/keras/python/keras/applications/mobilenet_test.py @@ -59,5 +59,43 @@ class MobileNetTest(test.TestCase): self.assertEqual(model.output_shape, (None, 1000)) keras.backend.set_image_data_format('channels_last') + def test_mobilenet_variable_input_channels(self): + input_shape = (None, None, 1) + model = keras.applications.MobileNet(weights=None, + include_top=False, + input_shape=input_shape) + self.assertEqual(model.output_shape, (None, None, None, 1024)) + + input_shape = (None, None, 4) + model = keras.applications.MobileNet(weights=None, + include_top=False, + input_shape=input_shape) + self.assertEqual(model.output_shape, (None, None, None, 1024)) + + def test_mobilenet_image_size(self): + with self.test_session(): + valid_image_sizes = [128, 160, 192, 224] + for size in valid_image_sizes: + keras.backend.set_image_data_format('channels_last') + input_shape = (size, size, 3) + model = keras.applications.MobileNet(input_shape=input_shape, + weights=None, + include_top=True) + self.assertEqual(model.input_shape, (None,) + input_shape) + + keras.backend.set_image_data_format('channels_first') + input_shape = (3, size, size) + model = keras.applications.MobileNet(input_shape=input_shape, + weights=None, + include_top=True) + self.assertEqual(model.input_shape, (None,) + input_shape) + + keras.backend.set_image_data_format('channels_last') + invalid_image_shape = (112, 112, 3) + with self.assertRaises(ValueError): + model = keras.applications.MobileNet(input_shape=invalid_image_shape, + weights='imagenet', + include_top=True) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/keras/python/keras/applications/resnet50.py b/tensorflow/contrib/keras/python/keras/applications/resnet50.py index 0de13c9592..794e05e2dc 100644 --- a/tensorflow/contrib/keras/python/keras/applications/resnet50.py +++ b/tensorflow/contrib/keras/python/keras/applications/resnet50.py @@ -42,7 +42,6 @@ from tensorflow.contrib.keras.python.keras.layers import GlobalAveragePooling2D from tensorflow.contrib.keras.python.keras.layers import GlobalMaxPooling2D from tensorflow.contrib.keras.python.keras.layers import Input from tensorflow.contrib.keras.python.keras.layers import MaxPooling2D -from tensorflow.contrib.keras.python.keras.layers import ZeroPadding2D from tensorflow.contrib.keras.python.keras.models import Model from tensorflow.contrib.keras.python.keras.utils.data_utils import get_file @@ -170,7 +169,7 @@ def ResNet50(include_top=True, if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). - It should have exactly 3 inputs channels, + It should have exactly 3 input channels, and width and height should be no smaller than 197. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction @@ -210,7 +209,8 @@ def ResNet50(include_top=True, default_size=224, min_size=197, data_format=K.image_data_format(), - include_top=include_top) + require_flatten=include_top, + weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) @@ -222,8 +222,8 @@ def ResNet50(include_top=True, else: bn_axis = 1 - x = ZeroPadding2D((3, 3))(img_input) - x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) + x = Conv2D(64, (7, 7), + strides=(2, 2), padding='same', name='conv1')(img_input) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) diff --git a/tensorflow/contrib/keras/python/keras/applications/vgg16.py b/tensorflow/contrib/keras/python/keras/applications/vgg16.py index 89bbb040e6..c38ae2a984 100644 --- a/tensorflow/contrib/keras/python/keras/applications/vgg16.py +++ b/tensorflow/contrib/keras/python/keras/applications/vgg16.py @@ -76,7 +76,7 @@ def VGG16(include_top=True, if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). - It should have exactly 3 inputs channels, + It should have exactly 3 input channels, and width and height should be no smaller than 48. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction @@ -115,7 +115,8 @@ def VGG16(include_top=True, default_size=224, min_size=48, data_format=K.image_data_format(), - include_top=include_top) + require_flatten=include_top, + weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) diff --git a/tensorflow/contrib/keras/python/keras/applications/vgg19.py b/tensorflow/contrib/keras/python/keras/applications/vgg19.py index 522a516ecf..ee67efaa92 100644 --- a/tensorflow/contrib/keras/python/keras/applications/vgg19.py +++ b/tensorflow/contrib/keras/python/keras/applications/vgg19.py @@ -76,7 +76,7 @@ def VGG19(include_top=True, if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). - It should have exactly 3 inputs channels, + It should have exactly 3 input channels, and width and height should be no smaller than 48. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction @@ -115,7 +115,8 @@ def VGG19(include_top=True, default_size=224, min_size=48, data_format=K.image_data_format(), - include_top=include_top) + require_flatten=include_top, + weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) diff --git a/tensorflow/contrib/keras/python/keras/applications/xception.py b/tensorflow/contrib/keras/python/keras/applications/xception.py index 49fb6008f6..7db7e9a9d6 100644 --- a/tensorflow/contrib/keras/python/keras/applications/xception.py +++ b/tensorflow/contrib/keras/python/keras/applications/xception.py @@ -86,7 +86,7 @@ def Xception(include_top=True, input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(299, 299, 3)`. - It should have exactly 3 inputs channels, + It should have exactly 3 input channels, and width and height should be no smaller than 71. E.g. `(150, 150, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction @@ -147,7 +147,8 @@ def Xception(include_top=True, default_size=299, min_size=71, data_format=K.image_data_format(), - include_top=include_top) + require_flatten=False, + weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) diff --git a/tensorflow/contrib/keras/python/keras/backend.py b/tensorflow/contrib/keras/python/keras/backend.py index 99570797af..76704d5d3d 100644 --- a/tensorflow/contrib/keras/python/keras/backend.py +++ b/tensorflow/contrib/keras/python/keras/backend.py @@ -266,7 +266,7 @@ def get_uid(prefix=''): graph = ops.get_default_graph() if graph not in tf_base_layers.PER_GRAPH_LAYER_NAME_UIDS: tf_base_layers.PER_GRAPH_LAYER_NAME_UIDS[graph] = collections.defaultdict( - int) + int) layer_name_uids = tf_base_layers.PER_GRAPH_LAYER_NAME_UIDS[graph] layer_name_uids[prefix] += 1 return layer_name_uids[prefix] @@ -490,13 +490,15 @@ def to_dense(tensor): name_scope = ops.name_scope -def variable(value, dtype=None, name=None): +def variable(value, dtype=None, name=None, constraint=None): """Instantiates a variable and returns it. Arguments: value: Numpy array, initial value of the tensor. dtype: Tensor type. name: Optional name string for the tensor. + constraint: Optional projection function to be + applied to the variable after an optimizer update. Returns: A variable instance (with Keras metadata included). @@ -523,10 +525,18 @@ def variable(value, dtype=None, name=None): sparse_coo.col, 1)), 1) v = sparse_tensor.SparseTensor( indices=indices, values=sparse_coo.data, dense_shape=sparse_coo.shape) + v._keras_shape = sparse_coo.shape v._uses_learning_phase = False return v v = variables_module.Variable( - value, dtype=_convert_string_dtype(dtype), name=name) + value, + dtype=_convert_string_dtype(dtype), + name=name, + constraint=constraint) + if isinstance(value, np.ndarray): + v._keras_shape = value.shape + elif hasattr(value, 'get_shape'): + v._keras_shape = int_shape(value) v._uses_learning_phase = False return v @@ -562,6 +572,57 @@ def constant(value, dtype=None, shape=None, name=None): return constant_op.constant(value, dtype=dtype, shape=shape, name=name) +def is_keras_tensor(x): + """Returns whether `x` is a Keras tensor. + + A "Keras tensor" is a tensor that was returned by a Keras layer, + (`Layer` class) or by `Input`. + + Arguments: + x: A candidate tensor. + + Returns: + A boolean: Whether the argument is a Keras tensor. + + Raises: + ValueError: In case `x` is not a symbolic tensor. + + Examples: + ```python + >>> from keras import backend as K + >>> from keras.layers import Input, Dense + >>> np_var = numpy.array([1, 2]) + >>> K.is_keras_tensor(np_var) # A numpy array is not a symbolic tensor. + ValueError + >>> k_var = tf.placeholder('float32', shape=(1,1)) + >>> K.is_keras_tensor(k_var) # A variable indirectly created outside of + keras is not a Keras tensor. + False + >>> keras_var = K.variable(np_var) + >>> K.is_keras_tensor(keras_var) # A variable created with the keras + backend is not a Keras tensor. + False + >>> keras_placeholder = K.placeholder(shape=(2, 4, 5)) + >>> K.is_keras_tensor(keras_placeholder) # A placeholder is not a Keras + tensor. + False + >>> keras_input = Input([10]) + >>> K.is_keras_tensor(keras_input) # An Input is a Keras tensor. + True + >>> keras_layer_output = Dense(10)(keras_input) + >>> K.is_keras_tensor(keras_layer_output) # Any Keras layer output is a + Keras tensor. + True + ``` + """ + if not isinstance(x, (ops.Tensor, + variables_module.Variable, + sparse_tensor.SparseTensor)): + raise ValueError('Unexpectedly found an instance of type `' + str(type(x)) + + '`. Expected a symbolic tensor instance.') + return hasattr(x, '_keras_history') + + def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None): """Instantiates a placeholder tensor and returns it. @@ -599,6 +660,21 @@ def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None): return x +def is_placeholder(x): + """Returns whether `x` is a placeholder. + + Arguments: + x: A candidate placeholder. + + Returns: + Boolean. + """ + try: + return x.op.type == 'Placeholder' + except AttributeError: + return False + + def shape(x): """Returns the symbolic shape of a tensor or variable. @@ -609,7 +685,8 @@ def shape(x): A symbolic shape (which is itself a tensor). Examples: - ``` + + ```python # TensorFlow example >>> from keras import backend as K >>> tf_session = K.get_session() @@ -651,9 +728,8 @@ def int_shape(x): (2, 2) ``` """ - shape = x.get_shape() try: - return tuple(shape.as_list()) + return tuple(x.get_shape().as_list()) except ValueError: return None @@ -759,7 +835,6 @@ def zeros(shape, dtype=None, name=None): """ if dtype is None: dtype = floatx() - shape = tuple(map(int, shape)) tf_dtype = _convert_string_dtype(dtype) return variable( init_ops.constant_initializer(0., dtype=tf_dtype)(shape), dtype, name) @@ -788,7 +863,6 @@ def ones(shape, dtype=None, name=None): """ if dtype is None: dtype = floatx() - shape = tuple(map(int, shape)) tf_dtype = _convert_string_dtype(dtype) return variable( init_ops.constant_initializer(1., dtype=tf_dtype)(shape), dtype, name) @@ -908,7 +982,6 @@ def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None): """ if dtype is None: dtype = floatx() - shape = tuple(map(int, shape)) tf_dtype = _convert_string_dtype(dtype) if seed is None: # ensure that randomness is conditioned by the Numpy RNG @@ -946,7 +1019,6 @@ def random_normal_variable(shape, mean, scale, dtype=None, name=None, """ if dtype is None: dtype = floatx() - shape = tuple(map(int, shape)) tf_dtype = _convert_string_dtype(dtype) if seed is None: # ensure that randomness is conditioned by the Numpy RNG @@ -1276,28 +1348,6 @@ def gather(reference, indices): # ELEMENT-WISE OPERATIONS -def _normalize_axis(axis, ndim): - """Converts negative axes to positive values. - - Arguments: - axis: Integer axis (possibly negative). - ndim: Rank of the tensor considered. - - Returns: - Positive integer axis. - """ - if isinstance(axis, tuple): - axis = list(axis) - if isinstance(axis, list): - for i, a in enumerate(axis): - if a is not None and a < 0: - axis[i] = a % ndim - else: - if axis is not None and axis < 0: - axis %= ndim - return axis - - def max(x, axis=None, keepdims=False): """Maximum value in a tensor. @@ -1312,7 +1362,6 @@ def max(x, axis=None, keepdims=False): Returns: A tensor with maximum values of `x`. """ - axis = _normalize_axis(axis, ndim(x)) return math_ops.reduce_max(x, axis=axis, keep_dims=keepdims) @@ -1330,7 +1379,6 @@ def min(x, axis=None, keepdims=False): Returns: A tensor with miminum values of `x`. """ - axis = _normalize_axis(axis, ndim(x)) return math_ops.reduce_min(x, axis=axis, keep_dims=keepdims) @@ -1348,7 +1396,6 @@ def sum(x, axis=None, keepdims=False): Returns: A tensor with sum of `x`. """ - axis = _normalize_axis(axis, ndim(x)) return math_ops.reduce_sum(x, axis=axis, keep_dims=keepdims) @@ -1366,7 +1413,6 @@ def prod(x, axis=None, keepdims=False): Returns: A tensor with the product of elements of `x`. """ - axis = _normalize_axis(axis, ndim(x)) return math_ops.reduce_prod(x, axis=axis, keep_dims=keepdims) @@ -1380,7 +1426,6 @@ def cumsum(x, axis=0): Returns: A tensor of the cumulative sum of values of `x` along `axis`. """ - axis = _normalize_axis(axis, ndim(x)) return math_ops.cumsum(x, axis=axis) @@ -1394,7 +1439,6 @@ def cumprod(x, axis=0): Returns: A tensor of the cumulative product of values of `x` along `axis`. """ - axis = _normalize_axis(axis, ndim(x)) return math_ops.cumprod(x, axis=axis) @@ -1412,7 +1456,6 @@ def var(x, axis=None, keepdims=False): Returns: A tensor with the variance of elements of `x`. """ - axis = _normalize_axis(axis, ndim(x)) if x.dtype.base_dtype == dtypes_module.bool: x = math_ops.cast(x, floatx()) m = math_ops.reduce_mean(x, axis=axis, keep_dims=True) @@ -1452,7 +1495,6 @@ def mean(x, axis=None, keepdims=False): Returns: A tensor with the mean of elements of `x`. """ - axis = _normalize_axis(axis, ndim(x)) if x.dtype.base_dtype == dtypes_module.bool: x = math_ops.cast(x, floatx()) return math_ops.reduce_mean(x, axis=axis, keep_dims=keepdims) @@ -1469,7 +1511,6 @@ def any(x, axis=None, keepdims=False): Returns: A uint8 tensor (0s and 1s). """ - axis = _normalize_axis(axis, ndim(x)) x = math_ops.cast(x, dtypes_module.bool) return math_ops.reduce_any(x, axis=axis, keep_dims=keepdims) @@ -1485,7 +1526,6 @@ def all(x, axis=None, keepdims=False): Returns: A uint8 tensor (0s and 1s). """ - axis = _normalize_axis(axis, ndim(x)) x = math_ops.cast(x, dtypes_module.bool) return math_ops.reduce_all(x, axis=axis, keep_dims=keepdims) @@ -1500,7 +1540,6 @@ def argmax(x, axis=-1): Returns: A tensor. """ - axis = _normalize_axis(axis, ndim(x)) return math_ops.argmax(x, axis) @@ -1514,7 +1553,6 @@ def argmin(x, axis=-1): Returns: A tensor. """ - axis = _normalize_axis(axis, ndim(x)) return math_ops.argmin(x, axis) @@ -1599,7 +1637,6 @@ def logsumexp(x, axis=None, keepdims=False): Returns: The reduced tensor. """ - axis = _normalize_axis(axis, ndim(x)) return math_ops.reduce_logsumexp(x, axis=axis, keep_dims=keepdims) @@ -1992,24 +2029,45 @@ def repeat_elements(x, rep, axis): rep: Python integer, number of times to repeat. axis: Axis along which to repeat. - Raises: - ValueError: In case `x.shape[axis]` is undefined. - Returns: A tensor. """ x_shape = x.get_shape().as_list() - if x_shape[axis] is None: - raise ValueError('Axis ' + str(axis) + ' of input tensor ' - 'should have a defined dimension, but is None. ' - 'Full tensor shape: ' + str(tuple(x_shape)) + '. ' - 'Typically you need to pass a fully-defined ' - '`input_shape` argument to your first layer.') - # slices along the repeat axis - splits = array_ops.split(value=x, num_or_size_splits=x_shape[axis], axis=axis) - # repeat each slice the given number of reps - x_rep = [s for s in splits for _ in range(rep)] - return concatenate(x_rep, axis) + # For static axis + if x_shape[axis] is not None: + # slices along the repeat axis + splits = array_ops.split(value=x, + num_or_size_splits=x_shape[axis], + axis=axis) + # repeat each slice the given number of reps + x_rep = [s for s in splits for _ in range(rep)] + return concatenate(x_rep, axis) + + # Here we use tf.tile to mimic behavior of np.repeat so that + # we can handle dynamic shapes (that include None). + # To do that, we need an auxiliary axis to repeat elements along + # it and then merge them along the desired axis. + + # Repeating + auxiliary_axis = axis + 1 + x_shape = array_ops.shape(x) + x_rep = array_ops.expand_dims(x, axis=auxiliary_axis) + reps = np.ones(len(x.get_shape()) + 1) + reps[auxiliary_axis] = rep + x_rep = array_ops.tile(x_rep, reps) + + # Merging + reps = np.delete(reps, auxiliary_axis) + reps[axis] = rep + reps = array_ops.constant(reps, dtype='int32') + x_shape *= reps + x_rep = array_ops.reshape(x_rep, x_shape) + + # Fix shape representation + x_shape = x.get_shape().as_list() + x_rep.set_shape(x_shape) + x_rep._keras_shape = tuple(x_shape) + return x_rep def repeat(x, n): @@ -2303,7 +2361,7 @@ def set_value(x, value): value: Value to set the tensor to, as a Numpy array (of the same shape). """ - value = np.asarray(value) + value = np.asarray(value, dtype=dtype(x)) tf_dtype = _convert_string_dtype(x.dtype.name.split('_')[0]) if hasattr(x, '_assign_placeholder'): assign_placeholder = x._assign_placeholder @@ -2327,7 +2385,7 @@ def batch_set_value(tuples): assign_ops = [] feed_dict = {} for x, value in tuples: - value = np.asarray(value) + value = np.asarray(value, dtype=dtype(x)) tf_dtype = _convert_string_dtype(x.dtype.name.split('_')[0]) if hasattr(x, '_assign_placeholder'): assign_placeholder = x._assign_placeholder @@ -2457,11 +2515,16 @@ def stop_gradient(variables): """Returns `variables` but with zero gradient w.r.t. every other variable. Arguments: - variables: List of variables. + variables: Tensor or list of tensors to consider constant with respect + to any other variable. + Returns: - The same list of variables. + A single tensor or a list of tensors (depending on the passed argument) + that has no gradient with respect to any other variable. """ + if isinstance(variables, (list, tuple)): + return map(array_ops.stop_gradient, variables) return array_ops.stop_gradient(variables) @@ -2874,14 +2937,14 @@ def softsign(x): return nn.softsign(x) -def categorical_crossentropy(output, target, from_logits=False): +def categorical_crossentropy(target, output, from_logits=False): """Categorical crossentropy between an output tensor and a target tensor. Arguments: + target: A tensor of the same shape as `output`. output: A tensor resulting from a softmax (unless `from_logits` is True, in which case `output` is expected to be the logits). - target: A tensor of the same shape as `output`. from_logits: Boolean, whether `output` is the result of a softmax, or is a tensor of logits. @@ -2895,8 +2958,8 @@ def categorical_crossentropy(output, target, from_logits=False): output /= math_ops.reduce_sum( output, axis=len(output.get_shape()) - 1, keep_dims=True) # manual computation of crossentropy - epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype) - output = clip_ops.clip_by_value(output, epsilon, 1. - epsilon) + epsilon_ = _to_tensor(epsilon(), output.dtype.base_dtype) + output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_) return -math_ops.reduce_sum( target * math_ops.log(output), axis=len(output.get_shape()) - 1) @@ -2904,14 +2967,14 @@ def categorical_crossentropy(output, target, from_logits=False): return nn.softmax_cross_entropy_with_logits(labels=target, logits=output) -def sparse_categorical_crossentropy(output, target, from_logits=False): +def sparse_categorical_crossentropy(target, output, from_logits=False): """Categorical crossentropy with integer targets. Arguments: + target: An integer tensor. output: A tensor resulting from a softmax (unless `from_logits` is True, in which case `output` is expected to be the logits). - target: An integer tensor. from_logits: Boolean, whether `output` is the result of a softmax, or is a tensor of logits. @@ -2921,8 +2984,8 @@ def sparse_categorical_crossentropy(output, target, from_logits=False): # Note: nn.sparse_softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: - epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype) - output = clip_ops.clip_by_value(output, epsilon, 1 - epsilon) + epsilon_ = _to_tensor(epsilon(), output.dtype.base_dtype) + output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_) output = math_ops.log(output) output_shape = output.get_shape() @@ -2937,12 +3000,12 @@ def sparse_categorical_crossentropy(output, target, from_logits=False): return res -def binary_crossentropy(output, target, from_logits=False): +def binary_crossentropy(target, output, from_logits=False): """Binary crossentropy between an output tensor and a target tensor. Arguments: - output: A tensor. target: A tensor with the same shape as `output`. + output: A tensor. from_logits: Whether `output` is expected to be a logits tensor. By default, we consider that `output` encodes a probability distribution. @@ -2954,8 +3017,8 @@ def binary_crossentropy(output, target, from_logits=False): # expects logits, Keras expects probabilities. if not from_logits: # transform back to logits - epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype) - output = clip_ops.clip_by_value(output, epsilon, 1 - epsilon) + epsilon_ = _to_tensor(epsilon(), output.dtype.base_dtype) + output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_) output = math_ops.log(output / (1 - output)) return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output) @@ -3026,7 +3089,7 @@ def dropout(x, level, noise_shape=None, seed=None): return nn.dropout(x * 1., retain_prob, noise_shape, seed=seed) -def l2_normalize(x, axis): +def l2_normalize(x, axis=None): """Normalizes a tensor wrt the L2 norm alongside the specified axis. Arguments: @@ -3036,8 +3099,6 @@ def l2_normalize(x, axis): Returns: A tensor. """ - if axis < 0: - axis %= len(x.get_shape()) return nn.l2_normalize(x, dim=axis) @@ -3807,7 +3868,7 @@ def ctc_label_dense_to_sparse(labels, label_lengths): label_lengths: length of the labels. Returns: - A sparse tensor representation of the lablels. + A sparse tensor representation of the labels. """ label_shape = array_ops.shape(labels) num_batches_tns = array_ops.stack([label_shape[0]]) diff --git a/tensorflow/contrib/keras/python/keras/backend_test.py b/tensorflow/contrib/keras/python/keras/backend_test.py index 69dcf3f094..0717c91c6c 100644 --- a/tensorflow/contrib/keras/python/keras/backend_test.py +++ b/tensorflow/contrib/keras/python/keras/backend_test.py @@ -139,6 +139,33 @@ class BackendUtilsTest(test.TestCase): y_val = f([1])[0] self.assertAllClose(y_val, 1) + def test_is_keras_tensor(self): + x = keras.backend.variable(1) + self.assertEqual(keras.backend.is_keras_tensor(x), False) + x = keras.Input(shape=(1,)) + self.assertEqual(keras.backend.is_keras_tensor(x), True) + with self.assertRaises(ValueError): + keras.backend.is_keras_tensor(0) + + def test_is_placeholder(self): + x = keras.backend.placeholder(shape=(1,)) + self.assertEqual(keras.backend.is_placeholder(x), True) + # Test with TF placeholder + x = keras.backend.array_ops.placeholder(dtype='float32', shape=(1,)) + self.assertEqual(keras.backend.is_placeholder(x), True) + x = keras.backend.variable(1) + self.assertEqual(keras.backend.is_placeholder(x), False) + + def test_stop_gradient(self): + x = keras.backend.variable(1) + y = keras.backend.stop_gradient(x) + self.assertEqual(y.op.name[:12], 'StopGradient') + + xs = [keras.backend.variable(1) for _ in range(3)] + ys = keras.backend.stop_gradient(xs) + for y in ys: + self.assertEqual(y.op.name[:12], 'StopGradient') + class BackendVariableTest(test.TestCase): @@ -408,10 +435,10 @@ class BackendShapeOpsTest(test.TestCase): y = keras.backend.repeat_elements(x, 3, axis=1) self.assertEqual(y.get_shape().as_list(), [1, 9, 2]) - # Invalid use: - with self.assertRaises(ValueError): - x = keras.backend.placeholder(shape=(2, None, 2)) - keras.backend.repeat_elements(x, 3, axis=1) + # Use with a dynamic axis: + x = keras.backend.placeholder(shape=(2, None, 2)) + y = keras.backend.repeat_elements(x, 3, axis=1) + self.assertEqual(y.get_shape().as_list(), [2, None, 2]) def test_repeat(self): x = keras.backend.variable(np.ones((1, 3))) diff --git a/tensorflow/contrib/keras/python/keras/callbacks.py b/tensorflow/contrib/keras/python/keras/callbacks.py index 06a5f4ad8f..323fdddb1f 100644 --- a/tensorflow/contrib/keras/python/keras/callbacks.py +++ b/tensorflow/contrib/keras/python/keras/callbacks.py @@ -397,7 +397,7 @@ class ModelCheckpoint(Callback): if mode not in ['auto', 'min', 'max']: logging.warning('ModelCheckpoint mode %s is unknown, ' - 'fallback to auto mode.' % (mode)) + 'fallback to auto mode.' % mode) mode = 'auto' if mode == 'min': @@ -618,7 +618,7 @@ class TensorBoard(Callback): If you have installed TensorFlow with pip, you should be able to launch TensorBoard from the command line: - ``` + ```sh tensorboard --logdir=/full_path_to_your_logs ``` @@ -686,6 +686,12 @@ class TensorBoard(Callback): tf_summary.histogram(mapped_weight_name, weight) if self.write_grads: grads = model.optimizer.get_gradients(model.total_loss, weight) + + def is_indexed_slices(grad): + return type(grad).__name__ == 'IndexedSlices' + + grads = [grad.values if is_indexed_slices(grad) else grad + for grad in grads] tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if self.write_images: w_img = array_ops.squeeze(weight) @@ -764,6 +770,9 @@ class TensorBoard(Callback): def on_epoch_end(self, epoch, logs=None): logs = logs or {} + if not self.validation_data and self.histogram_freq: + raise ValueError('If printing histograms, validation_data must be ' + 'provided, and cannot be a generator.') if self.validation_data and self.histogram_freq: if epoch % self.histogram_freq == 0: @@ -784,7 +793,11 @@ class TensorBoard(Callback): batch_val.append(val_data[1][i:i + step]) batch_val.append(val_data[2][i:i + step]) if self.model.uses_learning_phase: - batch_val.append(val_data[3]) + # do not slice the learning phase + batch_val = [x[i:i + step] for x in val_data[:-1]] + batch_val.append(val_data[-1]) + else: + batch_val = [x[i:i + step] for x in val_data] feed_dict = dict(zip(tensors, batch_val)) result = self.sess.run([self.merged], feed_dict=feed_dict) summary_str = result[0] @@ -978,6 +991,10 @@ class CSVLogger(Callback): else: return k + if self.model.stop_training: + # We set NA so that csv parsers do not fail for this last epoch. + logs = dict([(k, logs[k]) if k in logs else (k, 'NA') for k in self.keys]) + if not self.writer: self.keys = sorted(logs.keys()) diff --git a/tensorflow/contrib/keras/python/keras/callbacks_test.py b/tensorflow/contrib/keras/python/keras/callbacks_test.py index d8c5c0337f..f255feff41 100644 --- a/tensorflow/contrib/keras/python/keras/callbacks_test.py +++ b/tensorflow/contrib/keras/python/keras/callbacks_test.py @@ -347,7 +347,6 @@ class KerasCallbacksTest(test.TestCase): return model model = make_model() - # This should reduce the LR after the first epoch (due to high epsilon). cbks = [ keras.callbacks.ReduceLROnPlateau( @@ -365,28 +364,10 @@ class KerasCallbacksTest(test.TestCase): callbacks=cbks, epochs=5, verbose=0) - assert np.allclose( + self.assertAllClose( float(keras.backend.get_value(model.optimizer.lr)), 0.01, - atol=keras.backend.epsilon()) - - model = make_model() - cbks = [ - keras.callbacks.ReduceLROnPlateau( - monitor='val_loss', factor=0.1, epsilon=0, patience=1, cooldown=5) - ] - model.fit( - x_train, - y_train, - batch_size=BATCH_SIZE, - validation_data=(x_test, y_test), - callbacks=cbks, - epochs=5, - verbose=0) - assert np.allclose( - float(keras.backend.get_value(model.optimizer.lr)), - 0.1, - atol=keras.backend.epsilon()) + atol=1e-4) def test_CSVLogger(self): with self.test_session(): @@ -465,6 +446,61 @@ class KerasCallbacksTest(test.TestCase): os.remove(filepath) + def test_stop_training_csv(self): + # Test that using the CSVLogger callback with the TerminateOnNaN callback + # does not result in invalid CSVs. + np.random.seed(1337) + tmpdir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, tmpdir) + + with self.test_session(): + fp = os.path.join(tmpdir, 'test.csv') + (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES) + + y_test = keras.utils.to_categorical(y_test) + y_train = keras.utils.to_categorical(y_train) + cbks = [keras.callbacks.TerminateOnNaN(), keras.callbacks.CSVLogger(fp)] + model = keras.models.Sequential() + for _ in range(5): + model.add(keras.layers.Dense(2, input_dim=INPUT_DIM, activation='relu')) + model.add(keras.layers.Dense(NUM_CLASSES, activation='linear')) + model.compile(loss='mean_squared_error', + optimizer='rmsprop') + + def data_generator(): + i = 0 + max_batch_index = len(x_train) // BATCH_SIZE + tot = 0 + while 1: + if tot > 3 * len(x_train): + yield (np.ones([BATCH_SIZE, INPUT_DIM]) * np.nan, + np.ones([BATCH_SIZE, NUM_CLASSES]) * np.nan) + else: + yield (x_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE], + y_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]) + i += 1 + tot += 1 + i %= max_batch_index + + history = model.fit_generator(data_generator(), + len(x_train) // BATCH_SIZE, + validation_data=(x_test, y_test), + callbacks=cbks, + epochs=20) + loss = history.history['loss'] + assert len(loss) > 1 + assert loss[-1] == np.inf or np.isnan(loss[-1]) + + values = [] + with open(fp) as f: + for x in csv.reader(f): + values.append(x) + assert 'nan' in values[-1], 'The last epoch was not logged.' + def test_TerminateOnNaN(self): np.random.seed(1337) (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( @@ -593,6 +629,150 @@ class KerasCallbacksTest(test.TestCase): data_generator(True), len(x_train), epochs=2, callbacks=cbks) assert os.path.exists(temp_dir) + def test_TensorBoard_histogram_freq_must_have_validation_data(self): + np.random.seed(1337) + tmpdir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, tmpdir) + + with self.test_session(): + filepath = os.path.join(tmpdir, 'logs') + + (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES) + y_test = keras.utils.to_categorical(y_test) + y_train = keras.utils.to_categorical(y_train) + + def data_generator(train): + if train: + max_batch_index = len(x_train) // BATCH_SIZE + else: + max_batch_index = len(x_test) // BATCH_SIZE + i = 0 + while 1: + if train: + # simulate multi-input/output models + yield (x_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE], + y_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]) + else: + yield (x_test[i * BATCH_SIZE: (i + 1) * BATCH_SIZE], + y_test[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]) + i += 1 + i %= max_batch_index + + inp = keras.Input((INPUT_DIM,)) + hidden = keras.layers.Dense(2, activation='relu')(inp) + hidden = keras.layers.Dropout(0.1)(hidden) + output = keras.layers.Dense(NUM_CLASSES, activation='softmax')(hidden) + model = keras.models.Model(inputs=inp, outputs=output) + model.compile(loss='categorical_crossentropy', + optimizer='sgd', + metrics=['accuracy']) + + # we must generate new callbacks for each test, as they aren't stateless + def callbacks_factory(histogram_freq): + return [keras.callbacks.TensorBoard( + log_dir=filepath, + histogram_freq=histogram_freq, + write_images=True, write_grads=True, + embeddings_freq=1, + embeddings_layer_names=['dense_1'], + batch_size=5)] + + # fit w/o validation data should raise ValueError if histogram_freq > 0 + with self.assertRaises(ValueError): + model.fit(x_train, y_train, batch_size=BATCH_SIZE, + callbacks=callbacks_factory(histogram_freq=1), epochs=3) + + # fit generator without validation data should raise ValueError if + # histogram_freq > 0 + with self.assertRaises(ValueError): + model.fit_generator(data_generator(True), len(x_train), epochs=2, + callbacks=callbacks_factory(histogram_freq=1)) + + # fit generator with validation data generator should raise ValueError if + # histogram_freq > 0 + with self.assertRaises(ValueError): + model.fit_generator(data_generator(True), len(x_train), epochs=2, + validation_data=data_generator(False), + validation_steps=1, + callbacks=callbacks_factory(histogram_freq=1)) + + def test_TensorBoard_multi_input_output(self): + np.random.seed(1337) + tmpdir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, tmpdir) + + with self.test_session(): + filepath = os.path.join(tmpdir, 'logs') + + (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( + train_samples=TRAIN_SAMPLES, + test_samples=TEST_SAMPLES, + input_shape=(INPUT_DIM,), + num_classes=NUM_CLASSES) + y_test = keras.utils.to_categorical(y_test) + y_train = keras.utils.to_categorical(y_train) + + def data_generator(train): + if train: + max_batch_index = len(x_train) // BATCH_SIZE + else: + max_batch_index = len(x_test) // BATCH_SIZE + i = 0 + while 1: + if train: + # simulate multi-input/output models + yield ([x_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2, + [y_train[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2) + else: + yield ([x_test[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2, + [y_test[i * BATCH_SIZE: (i + 1) * BATCH_SIZE]] * 2) + i += 1 + i %= max_batch_index + + inp1 = keras.Input((INPUT_DIM,)) + inp2 = keras.Input((INPUT_DIM,)) + inp = keras.layers.add([inp1, inp2]) + hidden = keras.layers.Dense(2, activation='relu')(inp) + hidden = keras.layers.Dropout(0.1)(hidden) + output1 = keras.layers.Dense(NUM_CLASSES, activation='softmax')(hidden) + output2 = keras.layers.Dense(NUM_CLASSES, activation='softmax')(hidden) + model = keras.models.Model([inp1, inp2], [output1, output2]) + model.compile(loss='categorical_crossentropy', + optimizer='sgd', + metrics=['accuracy']) + + # we must generate new callbacks for each test, as they aren't stateless + def callbacks_factory(histogram_freq): + return [keras.callbacks.TensorBoard(log_dir=filepath, + histogram_freq=histogram_freq, + write_images=True, write_grads=True, + embeddings_freq=1, + embeddings_layer_names=['dense_1'], + batch_size=5)] + + # fit without validation data + model.fit([x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE, + callbacks=callbacks_factory(histogram_freq=0), epochs=3) + + # fit with validation data and accuracy + model.fit([x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE, + validation_data=([x_test] * 2, [y_test] * 2), + callbacks=callbacks_factory(histogram_freq=1), epochs=2) + + # fit generator without validation data + model.fit_generator(data_generator(True), len(x_train), epochs=2, + callbacks=callbacks_factory(histogram_freq=0)) + + # fit generator with validation data and accuracy + model.fit_generator(data_generator(True), len(x_train), epochs=2, + validation_data=([x_test] * 2, [y_test] * 2), + callbacks=callbacks_factory(histogram_freq=1)) + assert os.path.isdir(filepath) + def test_LambdaCallback(self): with self.test_session(): np.random.seed(1337) diff --git a/tensorflow/contrib/keras/python/keras/engine/topology.py b/tensorflow/contrib/keras/python/keras/engine/topology.py index 8f69dbf49c..6502ba0f72 100644 --- a/tensorflow/contrib/keras/python/keras/engine/topology.py +++ b/tensorflow/contrib/keras/python/keras/engine/topology.py @@ -89,7 +89,6 @@ class Layer(tf_base_layers.Layer): non_trainable_weights: List of variables. weights: The concatenation of the lists trainable_weights and non_trainable_weights (in this order). - constraints: Dict mapping weights to constraints. # Methods call(x, mask=None): Where the layer's logic lives. @@ -647,7 +646,6 @@ class Network(tf_base_layers.Network, Layer): outbound_nodes: list of nodes trainable_weights (list of variables) non_trainable_weights (list of variables) - constraints (list of tuples (weight, constraint)) # Methods summary @@ -843,6 +841,8 @@ class Network(tf_base_layers.Network, Layer): layer, node_index, tensor_index = self._input_coordinates[i] node_key = tf_base_layers._make_node_key(layer.name, node_index) + if node_key not in self._network_nodes: + continue new_node_index = node_conversion_map[node_key] model_inputs.append([layer.name, new_node_index, tensor_index]) config['input_layers'] = model_inputs @@ -851,6 +851,8 @@ class Network(tf_base_layers.Network, Layer): layer, node_index, tensor_index = self._output_coordinates[i] node_key = tf_base_layers._make_node_key(layer.name, node_index) + if node_key not in self._network_nodes: + continue new_node_index = node_conversion_map[node_key] model_outputs.append([layer.name, new_node_index, tensor_index]) config['output_layers'] = model_outputs @@ -872,10 +874,61 @@ class Network(tf_base_layers.Network, Layer): Raises: ValueError: In case of improperly formatted config dict. """ - # layer instances created during + # Layer instances created during # the graph reconstruction process created_layers = {} + # Dictionary mapping layer instances to + # node data that specifies a layer call. + # It acts as a queue that maintains any unprocessed + # layer call until it becomes possible to process it + # (i.e. until the input tensors to the call all exist). + unprocessed_nodes = {} + + def add_unprocessed_node(layer, node_data): + if layer not in unprocessed_nodes: + unprocessed_nodes[layer] = [node_data] + else: + unprocessed_nodes[layer].append(node_data) + + def process_node(layer, node_data): + """Deserialize a node. + + Arguments: + layer: layer instance. + node_data: node config dict. + + Raises: + ValueError: In case of improperly formatted `node_data` dict. + """ + input_tensors = [] + for input_data in node_data: + inbound_layer_name = input_data[0] + inbound_node_index = input_data[1] + inbound_tensor_index = input_data[2] + if len(input_data) == 3: + kwargs = {} + elif len(input_data) == 4: + kwargs = input_data[3] + else: + raise ValueError('Improperly formatted model config.') + if inbound_layer_name not in created_layers: + add_unprocessed_node(layer, node_data) + return + inbound_layer = created_layers[inbound_layer_name] + if len(inbound_layer.inbound_nodes) <= inbound_node_index: + add_unprocessed_node(layer, node_data) + return + inbound_node = inbound_layer.inbound_nodes[inbound_node_index] + input_tensors.append(inbound_node.output_tensors[inbound_tensor_index]) + # Call layer on its inputs, thus creating the node + # and building the layer if needed. + if input_tensors: + if len(input_tensors) == 1: + layer(input_tensors[0], **kwargs) + else: + layer(input_tensors, **kwargs) + def process_layer(layer_data): """Deserialize a layer, then call it on appropriate inputs. @@ -889,39 +942,32 @@ class Network(tf_base_layers.Network, Layer): # Instantiate layer. from tensorflow.contrib.keras.python.keras.layers import deserialize as deserialize_layer # pylint: disable=g-import-not-at-top + layer = deserialize_layer(layer_data, custom_objects=custom_objects) created_layers[layer_name] = layer # Gather layer inputs. inbound_nodes_data = layer_data['inbound_nodes'] for node_data in inbound_nodes_data: - input_tensors = [] - for input_data in node_data: - inbound_layer_name = input_data[0] - inbound_node_index = input_data[1] - inbound_tensor_index = input_data[2] - if len(input_data) == 3: - kwargs = {} - elif len(input_data) == 4: - kwargs = input_data[3] - else: - raise ValueError('Improperly formatted model config.') - if inbound_layer_name not in created_layers: - raise ValueError('Missing layer: ' + inbound_layer_name) - inbound_layer = created_layers[inbound_layer_name] - inbound_node = inbound_layer.inbound_nodes[inbound_node_index] - input_tensors.append( - inbound_node.output_tensors[inbound_tensor_index]) - # Call layer on its inputs, thus creating the node - # and building the layer if needed. - if input_tensors: - if len(input_tensors) == 1: - layer(input_tensors[0], **kwargs) - else: - layer(input_tensors, **kwargs) + # We don't process nodes (i.e. make layer calls) + # on the fly because the inbound node may not yet exist, + # in case of layer shared at different topological depths + # (e.g. a model such as A(B(A(B(x))))) + add_unprocessed_node(layer, node_data) + # First, we create all layers and enqueue nodes to be processed for layer_data in config['layers']: process_layer(layer_data) + # Then we process nodes in order of layer depth. + # Nodes that cannot yet be processed (if the inbound node + # does not yet exist) are re-enqueued, and the process + # is repeated until all nodes are processed. + while unprocessed_nodes: + for layer_data in config['layers']: + layer = created_layers[layer_data['name']] + if layer in unprocessed_nodes: + for node_data in unprocessed_nodes.pop(layer): + process_node(layer, node_data) name = config.get('name') input_tensors = [] diff --git a/tensorflow/contrib/keras/python/keras/engine/topology_test.py b/tensorflow/contrib/keras/python/keras/engine/topology_test.py index fa099515ab..0fe775cf66 100644 --- a/tensorflow/contrib/keras/python/keras/engine/topology_test.py +++ b/tensorflow/contrib/keras/python/keras/engine/topology_test.py @@ -637,5 +637,53 @@ class TopologyConstructionTest(test.TestCase): _ = keras.engine.topology.preprocess_weights_for_loading( model, model.weights, original_keras_version='1') + def test_layer_sharing_at_heterogenous_depth(self): + with self.test_session(): + x_val = np.random.random((10, 5)) + + x = keras.Input(shape=(5,)) + a = keras.layers.Dense(5, name='A') + b = keras.layers.Dense(5, name='B') + output = a(b(a(b(x)))) + m = keras.models.Model(x, output) + + output_val = m.predict(x_val) + + config = m.get_config() + weights = m.get_weights() + + m2 = keras.models.Model.from_config(config) + m2.set_weights(weights) + + output_val_2 = m2.predict(x_val) + self.assertAllClose(output_val, output_val_2, atol=1e-6) + + def test_layer_sharing_at_heterogenous_depth_with_concat(self): + with self.test_session(): + input_shape = (16, 9, 3) + input_layer = keras.Input(shape=input_shape) + + a = keras.layers.Dense(3, name='dense_A') + b = keras.layers.Dense(3, name='dense_B') + c = keras.layers.Dense(3, name='dense_C') + + x1 = b(a(input_layer)) + x2 = a(c(input_layer)) + output = keras.layers.concatenate([x1, x2]) + + m = keras.models.Model(inputs=input_layer, outputs=output) + + x_val = np.random.random((10, 16, 9, 3)) + output_val = m.predict(x_val) + + config = m.get_config() + weights = m.get_weights() + + m2 = keras.models.Model.from_config(config) + m2.set_weights(weights) + + output_val_2 = m2.predict(x_val) + self.assertAllClose(output_val, output_val_2, atol=1e-6) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/keras/python/keras/engine/training.py b/tensorflow/contrib/keras/python/keras/engine/training.py index fabfa537d8..619ae74b66 100644 --- a/tensorflow/contrib/keras/python/keras/engine/training.py +++ b/tensorflow/contrib/keras/python/keras/engine/training.py @@ -22,7 +22,6 @@ from __future__ import print_function import copy import numpy as np -import six from tensorflow.contrib.keras.python.keras import backend as K from tensorflow.contrib.keras.python.keras import callbacks as cbks @@ -65,6 +64,9 @@ def _standardize_input_data(data, ValueError: in case of improperly formatted user-provided data. """ if not names: + if data is not None and hasattr(data, '__len__') and len(data): + raise ValueError('Error when checking model ' + exception_prefix + ': ' + 'expected no data, but got:', data) return [] if data is None: return [None for _ in range(len(names))] @@ -83,7 +85,7 @@ def _standardize_input_data(data, ': the list of Numpy arrays ' 'that you are passing to your model ' 'is not the size the model expected. ' - 'Expected to see ' + str(len(names)) + ' arrays but instead got ' + 'Expected to see ' + str(len(names)) + ' array(s), but instead got ' 'the following list of ' + str(len(data)) + ' arrays: ' + str(data)[:200] + '...') else: @@ -441,6 +443,7 @@ def _weighted_masked_objective(fn): # score_array has ndim >= 2 score_array = fn(y_true, y_pred) if mask is not None: + # Cast the mask to floatX to avoid float64 upcasting in theano mask = K.cast(mask, K.floatx()) # mask should have the same shape as score_array score_array *= mask @@ -461,47 +464,6 @@ def _weighted_masked_objective(fn): return weighted -def _masked_objective(fn): - """Adds support for masking to an objective function. - - It transforms an objective function `fn(y_true, y_pred)` - into a cost-masked objective function - `fn(y_true, y_pred, mask)`. - - Arguments: - fn: The objective function to wrap, - with signature `fn(y_true, y_pred)`. - - Returns: - A function with signature `fn(y_true, y_pred, mask)`. - """ - - def masked(y_true, y_pred, mask=None): - """Wrapper function. - - Arguments: - y_true: `y_true` argument of `fn`. - y_pred: `y_pred` argument of `fn`. - mask: Mask tensor. - - Returns: - Scalar tensor. - """ - # score_array has ndim >= 2 - score_array = fn(y_true, y_pred) - if mask is not None: - mask = K.cast(mask, K.floatx()) - # mask should have the same shape as score_array - score_array *= mask - # the loss per batch should be proportional - # to the number of unmasked samples. - score_array /= K.mean(mask) - - return K.mean(score_array) - - return masked - - def _standardize_weights(y, sample_weight=None, class_weight=None, @@ -607,19 +569,21 @@ class Model(Container): metrics=None, loss_weights=None, sample_weight_mode=None, + weighted_metrics=None, + target_tensors=None, **kwargs): """Configures the model for training. Arguments: - optimizer: str (name of optimizer) or optimizer object. + optimizer: String (name of optimizer) or optimizer object. See [optimizers](/optimizers). - loss: str (name of objective function) or objective function. + loss: String (name of objective function) or objective function. See [losses](/losses). If the model has multiple outputs, you can use a different loss on each output by passing a dictionary or a list of losses. The loss value that will be minimized by the model will then be the sum of all individual losses. - metrics: list of metrics to be evaluated by the model + metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use `metrics=['accuracy']`. To specify different metrics for different outputs of a @@ -634,18 +598,29 @@ class Model(Container): If a list, it is expected to have a 1:1 mapping to the model's outputs. If a tensor, it is expected to map output names (strings) to scalar coefficients. - sample_weight_mode: if you need to do timestep-wise + sample_weight_mode: If you need to do timestep-wise sample weighting (2D weights), set this to `"temporal"`. `None` defaults to sample-wise weights (1D). If the model has multiple outputs, you can use a different `sample_weight_mode` on each output by passing a dictionary or a list of modes. - **kwargs: Additional arguments passed to `tf.Session.run`. + weighted_metrics: List of metrics to be evaluated and weighted + by sample_weight or class_weight during training and testing. + target_tensors: By default, Keras will create placeholders for the + model's target, which will be fed with the target data during + training. If instead you would like to use your own + target tensors (in turn, Keras will not expect external + Numpy data for these targets at training time), you + can specify them via the `target_tensors` argument. It can be + a single tensor (for a single-output model), a list of tensors, + or a dict mapping output names to target tensors. + **kwargs: When using the Theano/CNTK backends, these arguments + are passed into K.function. When using the TensorFlow backend, + these arguments are passed into `tf.Session.run`. Raises: ValueError: In case of invalid arguments for `optimizer`, `loss`, `metrics` or `sample_weight_mode`. - RuntimeError: In case of ill-formulated optimization problem. """ loss = loss or {} self.optimizer = optimizers.get(optimizer) @@ -682,19 +657,16 @@ class Model(Container): loss_functions = [loss_function for _ in range(len(self.outputs))] self.loss_functions = loss_functions weighted_losses = [_weighted_masked_objective(fn) for fn in loss_functions] - skip_indices = [] + skip_target_indices = [] + skip_target_weighing_indices = [] self._feed_outputs = [] self._feed_output_names = [] self._feed_output_shapes = [] self._feed_loss_fns = [] for i in range(len(weighted_losses)): if weighted_losses[i] is None: - skip_indices.append(i) - else: - self._feed_outputs.append(self.outputs[i]) - self._feed_output_names.append(self.output_names[i]) - self._feed_output_shapes.append(self.internal_output_shapes[i]) - self._feed_loss_fns.append(self.loss_functions[i]) + skip_target_indices.append(i) + skip_target_weighing_indices.append(i) # Prepare output masks. masks = self.compute_mask(self.inputs, mask=None) @@ -728,6 +700,57 @@ class Model(Container): raise TypeError('Could not interpret loss_weights argument: ' + str(loss_weights) + ' - expected a list of dicts.') + # Prepare targets of model. + self.targets = [] + self._feed_targets = [] + if target_tensors is not None: + if isinstance(target_tensors, list): + if len(target_tensors) != len(self.outputs): + raise ValueError('When passing a list as `target_tensors`, ' + 'it should have one entry per model outputs. ' + 'The model has ' + str(len(self.outputs)) + + ' outputs, but you passed target_tensors=' + + str(target_tensors)) + elif isinstance(target_tensors, dict): + for name in target_tensors: + if name not in self.output_names: + raise ValueError('Unknown entry in `target_tensors` ' + 'dictionary: "' + name + '". ' + 'Only expected the following keys: ' + + str(self.output_names)) + target_tensors_ = [] + for name in self.output_names: + target_tensors_.append(target_tensors.get(name, None)) + target_tensors = target_tensors_ + else: + raise TypeError('Expected `target_tensors` to be ' + 'a list or dict, but got:', target_tensors) + for i in range(len(self.outputs)): + if i in skip_target_indices: + self.targets.append(None) + else: + shape = self.internal_output_shapes[i] + name = self.output_names[i] + if target_tensors is not None: + target = target_tensors[i] + else: + target = None + if target is None or K.is_placeholder(target): + if target is None: + target = K.placeholder( + ndim=len(shape), + name=name + '_target', + sparse=K.is_sparse(self.outputs[i]), + dtype=K.dtype(self.outputs[i])) + self._feed_targets.append(target) + self._feed_outputs.append(self.outputs[i]) + self._feed_output_names.append(name) + self._feed_output_shapes.append(shape) + self._feed_loss_fns.append(self.loss_functions[i]) + else: + skip_target_weighing_indices.append(i) + self.targets.append(target) + # Prepare sample weights. sample_weights = [] sample_weight_modes = [] @@ -739,7 +762,7 @@ class Model(Container): 'Only expected the following keys: ' + str(self.output_names)) for i, name in enumerate(self.output_names): - if i in skip_indices: + if i in skip_target_weighing_indices: weight = None sample_weight_modes.append(None) else: @@ -762,7 +785,7 @@ class Model(Container): ' outputs, but you passed ' 'sample_weight_mode=' + str(sample_weight_mode)) for i in range(len(self.output_names)): - if i in skip_indices: + if i in skip_target_weighing_indices: weight = None sample_weight_modes.append(None) else: @@ -777,7 +800,7 @@ class Model(Container): sample_weights.append(weight) else: for i, name in enumerate(self.output_names): - if i in skip_indices: + if i in skip_target_weighing_indices: sample_weight_modes.append(None) sample_weights.append(None) else: @@ -792,111 +815,112 @@ class Model(Container): self.sample_weight_modes = sample_weight_modes self._feed_sample_weight_modes = [] for i in range(len(self.outputs)): - if i not in skip_indices: + if i not in skip_target_weighing_indices: self._feed_sample_weight_modes.append(self.sample_weight_modes[i]) - # Prepare targets of model. - self.targets = [] - self._feed_targets = [] - for i in range(len(self.outputs)): - if i in skip_indices: - self.targets.append(None) - else: - shape = self.internal_output_shapes[i] - name = self.output_names[i] - target = K.placeholder( - ndim=len(shape), - name=name + '_target', - sparse=K.is_sparse(self.outputs[i]), - dtype=K.dtype(self.outputs[i])) - self.targets.append(target) - self._feed_targets.append(target) - # Prepare metrics. self.metrics = metrics + self.weighted_metrics = weighted_metrics self.metrics_names = ['loss'] self.metrics_tensors = [] # Compute total loss. total_loss = None - for i in range(len(self.outputs)): - if i in skip_indices: - continue - y_true = self.targets[i] - y_pred = self.outputs[i] - weighted_loss = weighted_losses[i] - sample_weight = sample_weights[i] - mask = masks[i] - loss_weight = loss_weights_list[i] - output_loss = weighted_loss(y_true, y_pred, sample_weight, mask) - if len(self.outputs) > 1: - self.metrics_tensors.append(output_loss) - self.metrics_names.append(self.output_names[i] + '_loss') + with K.name_scope('loss'): + for i in range(len(self.outputs)): + if i in skip_target_indices: + continue + y_true = self.targets[i] + y_pred = self.outputs[i] + weighted_loss = weighted_losses[i] + sample_weight = sample_weights[i] + mask = masks[i] + loss_weight = loss_weights_list[i] + with K.name_scope(self.output_names[i] + '_loss'): + output_loss = weighted_loss(y_true, y_pred, sample_weight, mask) + if len(self.outputs) > 1: + self.metrics_tensors.append(output_loss) + self.metrics_names.append(self.output_names[i] + '_loss') + if total_loss is None: + total_loss = loss_weight * output_loss + else: + total_loss += loss_weight * output_loss if total_loss is None: - total_loss = loss_weight * output_loss - else: - total_loss += loss_weight * output_loss - if total_loss is None: - if not self.losses: - raise RuntimeError('The model cannot be compiled ' + if not self.losses: + raise ValueError('The model cannot be compiled ' 'because it has no loss to optimize.') - else: - total_loss = 0. + else: + total_loss = 0. - # Add regularization penalties - # and other layer-specific losses. - for loss_tensor in self.losses: - total_loss += loss_tensor + # Add regularization penalties + # and other layer-specific losses. + for loss_tensor in self.losses: + total_loss += loss_tensor # List of same size as output_names. # contains tuples (metrics for output, names of metrics). nested_metrics = _collect_metrics(metrics, self.output_names) + nested_weighted_metrics = _collect_metrics(weighted_metrics, + self.output_names) - def append_metric(layer_num, metric_name, metric_tensor): + def append_metric(layer_index, metric_name, metric_tensor): """Helper function used in loop below.""" if len(self.output_names) > 1: - metric_name = self._output_layers[layer_num].name + '_' + metric_name + metric_name = self.output_names[layer_index] + '_' + metric_name self.metrics_names.append(metric_name) self.metrics_tensors.append(metric_tensor) - for i in range(len(self.outputs)): - if i in skip_indices: - continue - y_true = self.targets[i] - y_pred = self.outputs[i] - output_metrics = nested_metrics[i] - for metric in output_metrics: - if metric == 'accuracy' or metric == 'acc': - # custom handling of accuracy - # (because of class mode duality) - output_shape = self.internal_output_shapes[i] - acc_fn = None - if (output_shape[-1] == 1 or - self.loss_functions[i] == losses.binary_crossentropy): - # case: binary accuracy - acc_fn = metrics_module.binary_accuracy - elif self.loss_functions[i] == losses.sparse_categorical_crossentropy: - # case: categorical accuracy with sparse targets - acc_fn = metrics_module.sparse_categorical_accuracy - else: - acc_fn = metrics_module.categorical_accuracy + with K.name_scope('metrics'): + for i in range(len(self.outputs)): + if i in skip_target_indices: + continue - masked_fn = _masked_objective(acc_fn) - append_metric(i, 'acc', masked_fn(y_true, y_pred, mask=masks[i])) - else: - metric_fn = metrics_module.get(metric) - masked_metric_fn = _masked_objective(metric_fn) - metric_result = masked_metric_fn(y_true, y_pred, mask=masks[i]) - metric_result = {metric_fn.__name__: metric_result} - for name, tensor in six.iteritems(metric_result): - append_metric(i, name, tensor) + y_true = self.targets[i] + y_pred = self.outputs[i] + weights = sample_weights[i] + output_metrics = nested_metrics[i] + output_weighted_metrics = nested_weighted_metrics[i] + + def handle_metrics(metrics, weights=None): + metric_name_prefix = 'weighted_' if weights is not None else '' + + for metric in metrics: + if metric == 'accuracy' or metric == 'acc': + # custom handling of accuracy + # (because of class mode duality) + output_shape = self.internal_output_shapes[i] + if (output_shape[-1] == 1 or + self.loss_functions[i] == losses.binary_crossentropy): + # case: binary accuracy + acc_fn = metrics_module.binary_accuracy + elif self.loss_functions[ + i] == losses.sparse_categorical_crossentropy: + # case: categorical accuracy with sparse targets + acc_fn = metrics_module.sparse_categorical_accuracy + else: + acc_fn = metrics_module.categorical_accuracy + + weighted_metric_fn = _weighted_masked_objective(acc_fn) + metric_name = metric_name_prefix + 'acc' + else: + metric_fn = metrics_module.get(metric) + weighted_metric_fn = _weighted_masked_objective(metric_fn) + metric_name = metric_name_prefix + metric_fn.__name__ + + with K.name_scope(metric_name): + metric_result = weighted_metric_fn( + y_true, y_pred, weights=weights, mask=masks[i]) + append_metric(i, metric_name, metric_result) + + handle_metrics(output_metrics) + handle_metrics(output_weighted_metrics, weights=weights) # Prepare gradient updates and state updates. self.total_loss = total_loss self.sample_weights = sample_weights self._feed_sample_weights = [] for i in range(len(self.sample_weights)): - if i not in skip_indices: + if i not in skip_target_weighing_indices: self._feed_sample_weights.append(sample_weights[i]) # Functions for train, test and predict will @@ -917,30 +941,30 @@ class Model(Container): raise RuntimeError('You must compile your model before using it.') if self.train_function is None: inputs = (self._feed_inputs + - self._feed_targets + self._feed_sample_weights) + self._feed_targets + + self._feed_sample_weights) if self.uses_learning_phase and not isinstance(K.learning_phase(), int): inputs += [K.learning_phase()] - constraints = {} - for w in self._collected_trainable_weights: - if hasattr(w, 'constraint') and w.constraint is not None: - constraints[w] = w.constraint - training_updates = self.optimizer.get_updates( - self._collected_trainable_weights, constraints, self.total_loss) - updates = self.updates + training_updates - # Gets loss and metrics. Updates weights at each call. - self.train_function = K.function( - inputs, [self.total_loss] + self.metrics_tensors, - updates=updates, - name='train_function', - **self._function_kwargs) + with K.name_scope('training'): + with K.name_scope(self.optimizer.__class__.__name__): + training_updates = self.optimizer.get_updates( + params=self._collected_trainable_weights, loss=self.total_loss) + updates = self.updates + training_updates + # Gets loss and metrics. Updates weights at each call. + self.train_function = K.function( + inputs, [self.total_loss] + self.metrics_tensors, + updates=updates, + name='train_function', + **self._function_kwargs) def _make_test_function(self): if not hasattr(self, 'test_function'): raise RuntimeError('You must compile your model before using it.') if self.test_function is None: inputs = (self._feed_inputs + - self._feed_targets + self._feed_sample_weights) + self._feed_targets + + self._feed_sample_weights) if self.uses_learning_phase and not isinstance(K.learning_phase(), int): inputs += [K.learning_phase()] # Return loss and metrics, no gradient updates. @@ -969,11 +993,54 @@ class Model(Container): name='predict_function', **kwargs) + def _check_num_samples(self, + ins, + batch_size=None, + steps=None, + steps_name='steps'): + """Determine the number of samples provided for training and evaluation. + + The number of samples is not defined when running with `steps`, + in which case the number of samples is set to `None`. + + Arguments: + ins: List of tensors to be fed to the Keras function. + batch_size: Integer batch size or `None` if not defined. + steps: Total number of steps (batches of samples) + before declaring `_predict_loop` finished. + Ignored with the default value of `None`. + steps_name: The public API's parameter name for `steps`. + + Raises: + ValueError: when `steps` is `None` and the attribute `ins.shape` + does not exist. Also raises ValueError when `steps` is not `None` + and `batch_size` is not `None` because they are mutually + exclusive. + + Returns: + When steps is `None`, returns the number of samples to be + processed based on the size of the first dimension of the + first input numpy array. When steps is not `None` and + `batch_size` is `None`, returns `None`. + """ + if steps is not None: + num_samples = None + if batch_size is not None: + raise ValueError('If ' + steps_name + + ' is set, the `batch_size` must be None.') + elif ins and hasattr(ins[0], 'shape'): + num_samples = ins[0].shape[0] + else: + raise ValueError('Either the input data should have ' + 'a defined shape, or ' + steps_name + + ' should be specified.') + return num_samples + def _fit_loop(self, f, ins, out_labels=None, - batch_size=32, + batch_size=None, epochs=100, verbose=1, callbacks=None, @@ -981,55 +1048,70 @@ class Model(Container): val_ins=None, shuffle=True, callback_metrics=None, - initial_epoch=0): + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None): """Abstract fit function for `f(ins)`. Assume that f returns a list, labeled by out_labels. Arguments: f: Keras function returning a list of tensors - ins: list of tensors to be fed to `f` - out_labels: list of strings, display names of + ins: List of tensors to be fed to `f` + out_labels: List of strings, display names of the outputs of `f` - batch_size: integer batch size - epochs: number of times to iterate over the data - verbose: verbosity mode, 0, 1 or 2 - callbacks: list of callbacks to be called during training + batch_size: Integer batch size or None if unknown. + epochs: Number of times to iterate over the data + verbose: Verbosity mode, 0, 1 or 2 + callbacks: List of callbacks to be called during training val_f: Keras function to call for validation - val_ins: list of tensors to be fed to `val_f` - shuffle: whether to shuffle the data at the beginning of each epoch - callback_metrics: list of strings, the display names of the metrics + val_ins: List of tensors to be fed to `val_f` + shuffle: Whether to shuffle the data at the beginning of each epoch + callback_metrics: List of strings, the display names of the metrics passed to the callbacks. They should be the concatenation of list the display names of the outputs of `f` and the list of display names of the outputs of `f_val`. - initial_epoch: epoch at which to start training + initial_epoch: Epoch at which to start training (useful for resuming a previous training run) + steps_per_epoch: Total number of steps (batches of samples) + before declaring one epoch finished and starting the + next epoch. Ignored with the default value of `None`. + validation_steps: Number of steps to run validation for (only if doing + validation from data tensors). Ignored with default value of `None`. Returns: `History` object. + + Raises: + ValueError: In case of invalid argument values. """ do_validation = False if val_f and val_ins: do_validation = True - if verbose: + if (verbose and ins and + hasattr(ins[0], 'shape') and hasattr(val_ins[0], 'shape')): print('Train on %d samples, validate on %d samples' % (ins[0].shape[0], val_ins[0].shape[0])) + if validation_steps: + if steps_per_epoch is None: + raise ValueError('Can only use `validation_steps` when doing step-wise ' + 'training, i.e. `steps_per_epoch` must be set.') + do_validation = True - if ins and hasattr(ins[0], 'shape'): - num_train_samples = ins[0].shape[0] - else: - # May happen if we are running `fit` without Numpy input data, - # i.e. if all inputs to the models are data tensors - # instead of placeholders. - # In that case we will run `fit` over a single batch. - num_train_samples = batch_size - verbose = 2 - index_array = np.arange(num_train_samples) + num_train_samples = self._check_num_samples( + ins, batch_size, steps_per_epoch, 'steps_per_epoch') + + if num_train_samples is not None: + index_array = np.arange(num_train_samples) self.history = cbks.History() callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history] if verbose: - callbacks += [cbks.ProgbarLogger()] + if steps_per_epoch is not None: + count_mode = 'steps' + else: + count_mode = 'samples' + callbacks += [cbks.ProgbarLogger(count_mode)] callbacks = cbks.CallbackList(callbacks) out_labels = out_labels or [] @@ -1044,6 +1126,7 @@ class Model(Container): callbacks.set_params({ 'batch_size': batch_size, 'epochs': epochs, + 'steps': steps_per_epoch, 'samples': num_train_samples, 'verbose': verbose, 'do_validation': do_validation, @@ -1056,55 +1139,85 @@ class Model(Container): for epoch in range(initial_epoch, epochs): callbacks.on_epoch_begin(epoch) - if shuffle == 'batch': - index_array = _batch_shuffle(index_array, batch_size) - elif shuffle: - np.random.shuffle(index_array) - - batches = _make_batches(num_train_samples, batch_size) epoch_logs = {} - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - try: - if isinstance(ins[-1], float): - # Do not slice the training phase flag. - ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = _slice_arrays(ins, batch_ids) - except TypeError: - raise TypeError('TypeError while preparing batch. ' - 'If using HDF5 input data, ' - 'pass shuffle="batch".') - batch_logs = {} - batch_logs['batch'] = batch_index - batch_logs['size'] = len(batch_ids) - callbacks.on_batch_begin(batch_index, batch_logs) - outs = f(ins_batch) - if not isinstance(outs, list): - outs = [outs] - for l, o in zip(out_labels, outs): - batch_logs[l] = o + if steps_per_epoch is not None: + for step_index in range(steps_per_epoch): + batch_logs = {} + batch_logs['batch'] = step_index + batch_logs['size'] = 1 + callbacks.on_batch_begin(step_index, batch_logs) + outs = f(ins) - callbacks.on_batch_end(batch_index, batch_logs) - if callback_model.stop_training: - break + if not isinstance(outs, list): + outs = [outs] + for l, o in zip(out_labels, outs): + batch_logs[l] = o - if batch_index == len(batches) - 1: # Last batch. - if do_validation: - val_outs = self._test_loop( - val_f, val_ins, batch_size=batch_size, verbose=0) - if not isinstance(val_outs, list): - val_outs = [val_outs] - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o + callbacks.on_batch_end(step_index, batch_logs) + if callback_model.stop_training: + break + + if do_validation: + val_outs = self._test_loop( + val_f, + val_ins, + batch_size=batch_size, + steps=validation_steps, + verbose=0) + if not isinstance(val_outs, list): + val_outs = [val_outs] + # Same labels assumed. + for l, o in zip(out_labels, val_outs): + epoch_logs['val_' + l] = o + else: + if shuffle == 'batch': + index_array = _batch_shuffle(index_array, batch_size) + elif shuffle: + np.random.shuffle(index_array) + + batches = _make_batches(num_train_samples, batch_size) + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + try: + if isinstance(ins[-1], float): + # Do not slice the training phase flag. + ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + else: + ins_batch = _slice_arrays(ins, batch_ids) + except TypeError: + raise TypeError('TypeError while preparing batch. ' + 'If using HDF5 input data, ' + 'pass shuffle="batch".') + batch_logs = {} + batch_logs['batch'] = batch_index + batch_logs['size'] = len(batch_ids) + callbacks.on_batch_begin(batch_index, batch_logs) + outs = f(ins_batch) + if not isinstance(outs, list): + outs = [outs] + for l, o in zip(out_labels, outs): + batch_logs[l] = o + + callbacks.on_batch_end(batch_index, batch_logs) + if callback_model.stop_training: + break + + if batch_index == len(batches) - 1: # Last batch. + if do_validation: + val_outs = self._test_loop( + val_f, val_ins, batch_size=batch_size, verbose=0) + if not isinstance(val_outs, list): + val_outs = [val_outs] + # Same labels assumed. + for l, o in zip(out_labels, val_outs): + epoch_logs['val_' + l] = o callbacks.on_epoch_end(epoch, epoch_logs) if callback_model.stop_training: break callbacks.on_train_end() return self.history - def _predict_loop(self, f, ins, batch_size=32, verbose=0): + def _predict_loop(self, f, ins, batch_size=32, verbose=0, steps=None): """Abstract method to loop over some data in batches. Arguments: @@ -1112,58 +1225,85 @@ class Model(Container): ins: list of tensors to be fed to `f`. batch_size: integer batch size. verbose: verbosity mode. + steps: Total number of steps (batches of samples) + before declaring `_predict_loop` finished. + Ignored with the default value of `None`. Returns: Array of predictions (if the model has a single output) or list of arrays of predictions (if the model has multiple outputs). """ - if ins and hasattr(ins[0], 'shape'): - samples = ins[0].shape[0] - else: - # May happen if we are running `predict` without Numpy input data, - # i.e. if all inputs to the models are data tensors - # instead of placeholders. - # In that case we will run `predict` over a single batch. - samples = batch_size - verbose = 2 - outs = [] + num_samples = self._check_num_samples(ins, batch_size, steps, 'steps') if verbose == 1: - progbar = Progbar(target=samples) - batches = _make_batches(samples, batch_size) - index_array = np.arange(samples) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - if ins and isinstance(ins[-1], float): - # Do not slice the training phase flag. - ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + if steps is not None: + progbar = Progbar(target=steps) else: - ins_batch = _slice_arrays(ins, batch_ids) - - batch_outs = f(ins_batch) - if not isinstance(batch_outs, list): - batch_outs = [batch_outs] - if batch_index == 0: - for batch_out in batch_outs: - shape = (samples,) + batch_out.shape[1:] - outs.append(np.zeros(shape, dtype=batch_out.dtype)) - - for i, batch_out in enumerate(batch_outs): - outs[i][batch_start:batch_end] = batch_out - if verbose == 1: - progbar.update(batch_end) - if len(outs) == 1: - return outs[0] - return outs + progbar = Progbar(target=num_samples) + if steps is not None: + # Step-based predictions. + # Since we do not know how many samples + # we will see, we cannot pre-allocate + # the returned Numpy arrays. + # Instead, we store one array per batch seen + # and concatenate them upon returning. + unconcatenated_outs = [] + for step in range(steps): + batch_outs = f(ins) + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + if step == 0: + for batch_out in batch_outs: + unconcatenated_outs.append([]) + for i, batch_out in enumerate(batch_outs): + unconcatenated_outs[i].append(batch_out) + if verbose == 1: + progbar.update(step) + if len(unconcatenated_outs) == 1: + return np.concatenate(unconcatenated_outs[0], axis=0) + return [ + np.concatenate(unconcatenated_outs[i], axis=0) + for i in range(len(unconcatenated_outs)) + ] + else: + # Sample-based predictions. + outs = [] + batches = _make_batches(num_samples, batch_size) + index_array = np.arange(num_samples) + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + if ins and isinstance(ins[-1], float): + # Do not slice the training phase flag. + ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + else: + ins_batch = _slice_arrays(ins, batch_ids) + batch_outs = f(ins_batch) + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + if batch_index == 0: + # Pre-allocate the results arrays. + for batch_out in batch_outs: + shape = (num_samples,) + batch_out.shape[1:] + outs.append(np.zeros(shape, dtype=batch_out.dtype)) + for i, batch_out in enumerate(batch_outs): + outs[i][batch_start:batch_end] = batch_out + if verbose == 1: + progbar.update(batch_end) + if len(outs) == 1: + return outs[0] + return outs - def _test_loop(self, f, ins, batch_size=32, verbose=0): + def _test_loop(self, f, ins, batch_size=None, verbose=0, steps=None): """Abstract method to loop over some data in batches. Arguments: f: Keras function returning a list of tensors. ins: list of tensors to be fed to `f`. - batch_size: integer batch size. + batch_size: integer batch size or `None`. verbose: verbosity mode. + steps: Total number of steps (batches of samples) + before declaring predictions finished. + Ignored with the default value of `None`. Returns: Scalar loss (if the model has a single output and no metrics) @@ -1171,45 +1311,56 @@ class Model(Container): and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. """ - if ins and hasattr(ins[0], 'shape'): - samples = ins[0].shape[0] - else: - # May happen if we are running `evaluate` without Numpy input data, - # i.e. if all inputs to the models are data tensors - # instead of placeholders. - # In that case we will run `evaluate` over a single batch. - samples = batch_size - verbose = 2 - + num_samples = self._check_num_samples(ins, batch_size, steps, 'steps') outs = [] - if verbose == 1: - progbar = Progbar(target=samples) - batches = _make_batches(samples, batch_size) - index_array = np.arange(samples) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - if isinstance(ins[-1], float): - # Do not slice the training phase flag. - ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = _slice_arrays(ins, batch_ids) - - batch_outs = f(ins_batch) - if isinstance(batch_outs, list): - if batch_index == 0: - for batch_out in enumerate(batch_outs): + if steps is not None: + if verbose == 1: + progbar = Progbar(target=steps) + for step in range(steps): + batch_outs = f(ins) + if isinstance(batch_outs, list): + if step == 0: + for _ in enumerate(batch_outs): + outs.append(0.) + for i, batch_out in enumerate(batch_outs): + outs[i] += batch_out + else: + if step == 0: outs.append(0.) - for i, batch_out in enumerate(batch_outs): - outs[i] += batch_out * len(batch_ids) - else: - if batch_index == 0: - outs.append(0.) - outs[0] += batch_outs * len(batch_ids) - + outs[0] += batch_outs + if verbose == 1: + progbar.update(step) + for i in range(len(outs)): + outs[i] /= steps + else: if verbose == 1: - progbar.update(batch_end) - for i in range(len(outs)): - outs[i] /= samples + progbar = Progbar(target=num_samples) + batches = _make_batches(num_samples, batch_size) + index_array = np.arange(num_samples) + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + if isinstance(ins[-1], float): + # Do not slice the training phase flag. + ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + else: + ins_batch = _slice_arrays(ins, batch_ids) + + batch_outs = f(ins_batch) + if isinstance(batch_outs, list): + if batch_index == 0: + for batch_out in enumerate(batch_outs): + outs.append(0.) + for i, batch_out in enumerate(batch_outs): + outs[i] += batch_out * len(batch_ids) + else: + if batch_index == 0: + outs.append(0.) + outs[0] += batch_outs * len(batch_ids) + + if verbose == 1: + progbar.update(batch_end) + for i in range(len(outs)): + outs[i] /= num_samples if len(outs) == 1: return outs[0] return outs @@ -1285,7 +1436,7 @@ class Model(Container): def fit(self, x=None, y=None, - batch_size=32, + batch_size=None, epochs=1, verbose=1, callbacks=None, @@ -1294,7 +1445,9 @@ class Model(Container): shuffle=True, class_weight=None, sample_weight=None, - initial_epoch=0): + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None): """Trains the model for a fixed number of epochs (iterations on a dataset). Arguments: @@ -1308,42 +1461,54 @@ class Model(Container): If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. - batch_size: integer. Number of samples per gradient update. - epochs: integer, the number of times to iterate + batch_size: Integer or `None`. + Number of samples per gradient update. + If unspecified, it will default to 32. + epochs: Integer, the number of times to iterate over the training data arrays. verbose: 0, 1, or 2. Verbosity mode. 0 = silent, 1 = verbose, 2 = one log line per epoch. - callbacks: list of callbacks to be called during training. + callbacks: List of callbacks to be called during training. See [callbacks](/callbacks). - validation_split: float between 0 and 1: + validation_split: Float between 0 and 1: fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. - validation_data: data on which to evaluate + validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. This could be a tuple (x_val, y_val) or a tuple (x_val, y_val, val_sample_weights). - shuffle: boolean, whether to shuffle the training data - before each epoch. - class_weight: optional dictionary mapping + shuffle: Boolean, whether to shuffle the training data + before each epoch. Has no effect when `steps_per_epoch` + is not `None`. + class_weight: Optional dictionary mapping class indices (integers) to a weight (float) to apply to the model's loss for the samples from this class during training. This can be useful to tell the model to "pay more attention" to samples from an under-represented class. - sample_weight: optional array of the same length as x, containing + sample_weight: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). - initial_epoch: epoch at which to start training + initial_epoch: Epoch at which to start training (useful for resuming a previous training run) + steps_per_epoch: Total number of steps (batches of samples) + before declaring one epoch finished and starting the + next epoch. When training with Input Tensors such as + TensorFlow data tensors, the default `None` is equal to + the number of unique samples in your dataset divided by + the batch size, or 1 if that cannot be determined. + validation_steps: Only relevant if `steps_per_epoch` + is specified. Total number of steps (batches of samples) + to validate before stopping. Returns: A `History` instance. Its `history` attribute contains @@ -1353,7 +1518,13 @@ class Model(Container): ValueError: In case of mismatch between the provided input data and what the model expects. """ - + # Backwards compatibility + if batch_size is None and steps_per_epoch is None: + batch_size = 32 + if x is None and y is None and steps_per_epoch is None: + raise ValueError('If fitting from data tensors, ' + 'you should specify the `steps_per_epoch` ' + 'argument.') # Validate user data. x, y, sample_weights = self._standardize_user_data( x, @@ -1362,7 +1533,10 @@ class Model(Container): class_weight=class_weight, check_batch_axis=False, batch_size=batch_size) + # Prepare validation data. + do_validation = False + val_ins = [] if validation_data: do_validation = True if len(validation_data) == 2: @@ -1383,8 +1557,6 @@ class Model(Container): sample_weight=val_sample_weight, check_batch_axis=False, batch_size=batch_size) - self._make_test_function() - val_f = self.test_function if self.uses_learning_phase and not isinstance(K.learning_phase(), int): val_ins = val_x + val_y + val_sample_weights + [0.] else: @@ -1400,16 +1572,15 @@ class Model(Container): y, val_y = (_slice_arrays(y, 0, split_at), _slice_arrays(y, split_at)) sample_weights, val_sample_weights = (_slice_arrays( sample_weights, 0, split_at), _slice_arrays(sample_weights, split_at)) - self._make_test_function() - val_f = self.test_function if self.uses_learning_phase and not isinstance(K.learning_phase(), int): val_ins = val_x + val_y + val_sample_weights + [0.] else: val_ins = val_x + val_y + val_sample_weights - else: - do_validation = False - val_f = None - val_ins = None + + elif validation_steps: + do_validation = True + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + val_ins = [0.] # Prepare input arrays and training function. if self.uses_learning_phase and not isinstance(K.learning_phase(), int): @@ -1423,10 +1594,13 @@ class Model(Container): out_labels = self._get_deduped_metrics_names() if do_validation: + self._make_test_function() + val_f = self.test_function callback_metrics = copy.copy(out_labels) + [ 'val_' + n for n in out_labels ] else: + val_f = None callback_metrics = copy.copy(out_labels) # Delegate logic to `_fit_loop`. @@ -1442,9 +1616,17 @@ class Model(Container): val_ins=val_ins, shuffle=shuffle, callback_metrics=callback_metrics, - initial_epoch=initial_epoch) - - def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None): + initial_epoch=initial_epoch, + steps_per_epoch=steps_per_epoch, + validation_steps=validation_steps) + + def evaluate(self, + x, + y, + batch_size=None, + verbose=1, + sample_weight=None, + steps=None): """Returns the loss value & metrics values for the model in test mode. Computation is done in batches. @@ -1460,17 +1642,30 @@ class Model(Container): If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. - batch_size: integer. Number of samples per gradient update. - verbose: verbosity mode, 0 or 1. + batch_size: Integer. If unspecified, it will default to 32. + verbose: Verbosity mode, 0 or 1. sample_weight: Array of weights to weight the contribution of different samples to the loss and metrics. + steps: Total number of steps (batches of samples) + before declaring the evaluation round finished. + Ignored with the default value of `None`. Returns: Scalar test loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. + + Raises: + ValueError: In case of invalid argument values. """ + # Backwards compatibility. + if batch_size is None and steps is None: + batch_size = 32 + if x is None and y is None and steps is None: + raise ValueError('If evaluating from data tensors, ' + 'you should specify the `steps` ' + 'argument.') # Validate user data. x, y, sample_weights = self._standardize_user_data( x, @@ -1485,18 +1680,22 @@ class Model(Container): ins = x + y + sample_weights self._make_test_function() f = self.test_function - return self._test_loop(f, ins, batch_size=batch_size, verbose=verbose) + return self._test_loop( + f, ins, batch_size=batch_size, verbose=verbose, steps=steps) - def predict(self, x, batch_size=32, verbose=0): + def predict(self, x, batch_size=None, verbose=0, steps=None): """Generates output predictions for the input samples. Computation is done in batches. Arguments: - x: the input data, as a Numpy array + x: The input data, as a Numpy array (or list of Numpy arrays if the model has multiple outputs). - batch_size: integer. - verbose: verbosity mode, 0 or 1. + batch_size: Integer. If unspecified, it will default to 32. + verbose: Verbosity mode, 0 or 1. + steps: Total number of steps (batches of samples) + before declaring the prediction round finished. + Ignored with the default value of `None`. Returns: Numpy array(s) of predictions. @@ -1507,6 +1706,13 @@ class Model(Container): or in case a stateful model receives a number of samples that is not a multiple of the batch size. """ + # Backwards compatibility. + if batch_size is None and steps is None: + batch_size = 32 + if x is None and steps is None: + raise ValueError('If predicting from data tensors, ' + 'you should specify the `steps` ' + 'argument.') # Validate user data. x = _standardize_input_data( x, @@ -1529,7 +1735,8 @@ class Model(Container): ins = x self._make_predict_function() f = self.predict_function - return self._predict_loop(f, ins, batch_size=batch_size, verbose=verbose) + return self._predict_loop( + f, ins, batch_size=batch_size, verbose=verbose, steps=steps) def train_on_batch(self, x, y, sample_weight=None, class_weight=None): """Runs a single gradient update on a single batch of data. @@ -1545,14 +1752,14 @@ class Model(Container): If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. - sample_weight: optional array of the same length as x, containing + sample_weight: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). - class_weight: optional dictionary mapping + class_weight: Optional dictionary mapping class indices (integers) to a weight (float) to apply to the model's loss for the samples from this class during training. @@ -1596,7 +1803,7 @@ class Model(Container): If all outputs in the model are named, you can also pass a dictionary mapping output names to Numpy arrays. - sample_weight: optional array of the same length as x, containing + sample_weight: Optional array of the same length as x, containing weights to apply to the model's loss for each sample. In the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), @@ -1655,6 +1862,7 @@ class Model(Container): max_queue_size=10, workers=1, use_multiprocessing=False, + shuffle=True, initial_epoch=0, **kwargs): """Fits the model on data yielded batch-by-batch by a Python generator. @@ -1668,7 +1876,7 @@ class Model(Container): using `use_multiprocessing=True`. Arguments: - generator: a generator or an instance of Sequence (keras.utils.Sequence) + generator: A generator or an instance of Sequence (keras.utils.Sequence) object in order to avoid duplicate data when using multiprocessing. The output of the generator must be either @@ -1683,29 +1891,32 @@ class Model(Container): finished and starting the next epoch. It should typically be equal to the number of unique samples if your dataset divided by the batch size. - epochs: integer, total number of iterations on the data. - verbose: verbosity mode, 0, 1, or 2. - callbacks: list of callbacks to be called during training. - validation_data: this can be either + epochs: Integer, total number of iterations on the data. + verbose: Verbosity mode, 0, 1, or 2. + callbacks: List of callbacks to be called during training. + validation_data: This can be either - a generator for the validation data - a tuple (inputs, targets) - a tuple (inputs, targets, sample_weights). validation_steps: Only relevant if `validation_data` is a generator. Total number of steps (batches of samples) to yield from `generator` before stopping. - class_weight: dictionary mapping class indices to a weight + class_weight: Dictionary mapping class indices to a weight for the class. - max_queue_size: maximum size for the generator queue - workers: maximum number of processes to spin up + max_queue_size: Maximum size for the generator queue + workers: Maximum number of processes to spin up when using process based threading - use_multiprocessing: if True, use process based threading. + use_multiprocessing: If True, use process based threading. Note that because this implementation relies on multiprocessing, you should not pass non picklable arguments to the generator as they can't be passed easily to children processes. - initial_epoch: epoch at which to start training + shuffle: Whether to shuffle the data at the beginning of each + epoch. Only used with instances of `Sequence` ( + keras.utils.Sequence). + initial_epoch: Epoch at which to start training (useful for resuming a previous training run) **kwargs: support for legacy arguments. @@ -1733,7 +1944,7 @@ class Model(Container): ValueError: In case the generator yields data in an invalid format. """ - # Legacy support + # Legacy support if 'max_q_size' in kwargs: max_queue_size = kwargs.pop('max_q_size') logging.warning('The argument `max_q_size` has been renamed ' @@ -1810,15 +2021,16 @@ class Model(Container): is_sequence = isinstance(generator, Sequence) if not is_sequence and use_multiprocessing and workers > 1: logging.warning( - 'Using a generator with `use_multiprocessing=True`' - ' may duplicate your data.Please consider using ' - 'the `keras.utils.Sequence` class.') + logging.warning('Using a generator with `use_multiprocessing=True`' + ' and multiple workers may duplicate your data.' + ' Please consider using the`keras.utils.Sequence' + ' class.')) enqueuer = None try: if is_sequence: enqueuer = OrderedEnqueuer( - generator, use_multiprocessing=use_multiprocessing) + generator, use_multiprocessing=use_multiprocessing, shuffle=shuffle) else: enqueuer = GeneratorEnqueuer( generator, @@ -1899,6 +2111,9 @@ class Model(Container): for l, o in zip(out_labels, val_outs): epoch_logs['val_' + l] = o + if callback_model.stop_training: + break + callbacks.on_epoch_end(epoch, epoch_logs) epoch += 1 if callback_model.stop_training: @@ -1975,9 +2190,10 @@ class Model(Container): is_sequence = isinstance(generator, Sequence) if not is_sequence and use_multiprocessing and workers > 1: logging.warning( - 'Using a generator with `use_multiprocessing=True`' - ' may duplicate your data.Please consider using ' - 'the `keras.utils.Sequence` class.') + logging.warning('Using a generator with `use_multiprocessing=True`' + ' and multiple workers may duplicate your data.' + ' Please consider using the`keras.utils.Sequence' + ' class.')) enqueuer = None try: @@ -2086,8 +2302,6 @@ class Model(Container): logging.warning('The argument `pickle_safe` has been renamed ' '`use_multiprocessing`. ' 'Update your method calls accordingly.') - if kwargs: - raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) self._make_predict_function() @@ -2097,9 +2311,10 @@ class Model(Container): is_sequence = isinstance(generator, Sequence) if not is_sequence and use_multiprocessing and workers > 1: logging.warning( - 'Using a generator with `use_multiprocessing=True`' - ' may duplicate your data.Please consider using ' - 'the `keras.utils.Sequence` class.') + logging.warning('Using a generator with `use_multiprocessing=True`' + ' and multiple workers may duplicate your data.' + ' Please consider using the`keras.utils.Sequence' + ' class.')) enqueuer = None try: diff --git a/tensorflow/contrib/keras/python/keras/engine/training_test.py b/tensorflow/contrib/keras/python/keras/engine/training_test.py index ad6812ddaf..30cdba96e4 100644 --- a/tensorflow/contrib/keras/python/keras/engine/training_test.py +++ b/tensorflow/contrib/keras/python/keras/engine/training_test.py @@ -305,7 +305,7 @@ class TrainingTest(test.TestCase): optimizer='rmsprop', metrics=set(0)) - with self.assertRaises(RuntimeError): + with self.assertRaises(ValueError): model.compile(loss=None, optimizer='rmsprop') @@ -981,5 +981,443 @@ class TestTrainingUtils(test.TestCase): keras.engine.training._slice_arrays(input_a, stop=2) +class TestTrainingWithDataTensors(test.TestCase): + + def test_model_with_input_feed_tensor(self): + """We test building a model with a TF variable as input. + + We should be able to call fit, evaluate, predict, + by only passing them data for the placeholder inputs + in the model. + """ + with self.test_session(): + input_a_np = np.random.random((10, 3)) + input_b_np = np.random.random((10, 3)) + + output_a_np = np.random.random((10, 4)) + output_b_np = np.random.random((10, 3)) + + a = keras.Input( + tensor=keras.backend.variables_module.Variable(input_a_np, + dtype='float32')) + b = keras.Input(shape=(3,), name='input_b') + + a_2 = keras.layers.Dense(4, name='dense_1')(a) + dp = keras.layers.Dropout(0.5, name='dropout') + b_2 = dp(b) + + model = keras.models.Model([a, b], [a_2, b_2]) + model.summary() + + optimizer = 'rmsprop' + loss = 'mse' + loss_weights = [1., 0.5] + model.compile(optimizer, loss, metrics=['mean_squared_error'], + loss_weights=loss_weights, + sample_weight_mode=None) + + # test train_on_batch + out = model.train_on_batch(input_b_np, + [output_a_np, output_b_np]) + out = model.train_on_batch({'input_b': input_b_np}, + [output_a_np, output_b_np]) + out = model.test_on_batch({'input_b': input_b_np}, + [output_a_np, output_b_np]) + out = model.predict_on_batch({'input_b': input_b_np}) + + # test fit + out = model.fit({'input_b': input_b_np}, + [output_a_np, output_b_np], epochs=1, batch_size=10) + out = model.fit(input_b_np, + [output_a_np, output_b_np], epochs=1, batch_size=10) + + # test evaluate + out = model.evaluate({'input_b': input_b_np}, + [output_a_np, output_b_np], batch_size=10) + out = model.evaluate(input_b_np, + [output_a_np, output_b_np], batch_size=10) + + # test predict + out = model.predict({'input_b': input_b_np}, batch_size=10) + out = model.predict(input_b_np, batch_size=10) + self.assertEqual(len(out), 2) + + # Now test a model with a single input + # i.e. we don't pass any data to fit the model. + a = keras.Input( + tensor=keras.backend.variables_module.Variable(input_a_np, + dtype='float32')) + a_2 = keras.layers.Dense(4, name='dense_1')(a) + a_2 = keras.layers.Dropout(0.5, name='dropout')(a_2) + model = keras.models.Model(a, a_2) + model.summary() + + optimizer = 'rmsprop' + loss = 'mse' + model.compile(optimizer, loss, metrics=['mean_squared_error']) + + # test train_on_batch + out = model.train_on_batch(None, + output_a_np) + out = model.train_on_batch(None, + output_a_np) + out = model.test_on_batch(None, + output_a_np) + out = model.predict_on_batch(None) + out = model.train_on_batch([], + output_a_np) + out = model.train_on_batch({}, + output_a_np) + + # test fit + out = model.fit(None, + output_a_np, epochs=1, batch_size=10) + out = model.fit(None, + output_a_np, epochs=1, batch_size=10) + + # test evaluate + out = model.evaluate(None, + output_a_np, batch_size=10) + out = model.evaluate(None, + output_a_np, batch_size=10) + + # test predict + out = model.predict(None, steps=3) + out = model.predict(None, steps=3) + self.assertEqual(out.shape, (10 * 3, 4)) + + # Same, without learning phase + # i.e. we don't pass any data to fit the model. + a = keras.Input( + tensor=keras.backend.variables_module.Variable(input_a_np, + dtype='float32')) + a_2 = keras.layers.Dense(4, name='dense_1')(a) + model = keras.models.Model(a, a_2) + model.summary() + + optimizer = 'rmsprop' + loss = 'mse' + model.compile(optimizer, loss, metrics=['mean_squared_error']) + + # test train_on_batch + out = model.train_on_batch(None, + output_a_np) + out = model.train_on_batch(None, + output_a_np) + out = model.test_on_batch(None, + output_a_np) + out = model.predict_on_batch(None) + out = model.train_on_batch([], + output_a_np) + out = model.train_on_batch({}, + output_a_np) + + # test fit + out = model.fit(None, + output_a_np, epochs=1, batch_size=10) + out = model.fit(None, + output_a_np, epochs=1, batch_size=10) + + # test evaluate + out = model.evaluate(None, + output_a_np, batch_size=10) + out = model.evaluate(None, + output_a_np, batch_size=10) + + # test predict + out = model.predict(None, steps=3) + out = model.predict(None, steps=3) + self.assertEqual(out.shape, (10 * 3, 4)) + + def test_model_with_partial_loss(self): + with self.test_session(): + a = keras.Input(shape=(3,), name='input_a') + a_2 = keras.layers.Dense(4, name='dense_1')(a) + dp = keras.layers.Dropout(0.5, name='dropout') + a_3 = dp(a_2) + model = keras.models.Model(a, [a_2, a_3]) + + optimizer = 'rmsprop' + loss = {'dropout': 'mse'} + model.compile(optimizer, loss, metrics=['mae']) + + input_a_np = np.random.random((10, 3)) + output_a_np = np.random.random((10, 4)) + + # test train_on_batch + _ = model.train_on_batch(input_a_np, output_a_np) + _ = model.test_on_batch(input_a_np, output_a_np) + # fit + _ = model.fit(input_a_np, [output_a_np]) + # evaluate + _ = model.evaluate(input_a_np, [output_a_np]) + + # Same without dropout. + a = keras.Input(shape=(3,), name='input_a') + a_2 = keras.layers.Dense(4, name='dense_1')(a) + a_3 = keras.layers.Dense(4, name='dense_2')(a_2) + model = keras.models.Model(a, [a_2, a_3]) + + optimizer = 'rmsprop' + loss = {'dense_2': 'mse'} + model.compile(optimizer, loss, metrics={'dense_1': 'mae'}) + + # test train_on_batch + _ = model.train_on_batch(input_a_np, output_a_np) + _ = model.test_on_batch(input_a_np, output_a_np) + # fit + _ = model.fit(input_a_np, [output_a_np]) + # evaluate + _ = model.evaluate(input_a_np, [output_a_np]) + + def test_model_with_external_loss(self): + with self.test_session(): + # None loss, only regularization loss. + a = keras.Input(shape=(3,), name='input_a') + a_2 = keras.layers.Dense(4, name='dense_1', + kernel_regularizer='l1', + bias_regularizer='l2')(a) + dp = keras.layers.Dropout(0.5, name='dropout') + a_3 = dp(a_2) + + model = keras.models.Model(a, [a_2, a_3]) + + optimizer = 'rmsprop' + loss = None + model.compile(optimizer, loss, metrics=['mae']) + + input_a_np = np.random.random((10, 3)) + + # test train_on_batch + out = model.train_on_batch(input_a_np, None) + out = model.test_on_batch(input_a_np, None) + # fit + out = model.fit(input_a_np, None) + # evaluate + out = model.evaluate(input_a_np, None) + + # No dropout, external loss. + a = keras.Input(shape=(3,), name='input_a') + a_2 = keras.layers.Dense(4, name='dense_1')(a) + a_3 = keras.layers.Dense(4, name='dense_2')(a) + + model = keras.models.Model(a, [a_2, a_3]) + model.add_loss(keras.backend.mean(a_3 + a_2)) + + optimizer = 'rmsprop' + loss = None + model.compile(optimizer, loss, metrics=['mae']) + + # test train_on_batch + out = model.train_on_batch(input_a_np, None) + out = model.test_on_batch(input_a_np, None) + # fit + out = model.fit(input_a_np, None) + # evaluate + out = model.evaluate(input_a_np, None) + + # Test model with no external data at all. + a = keras.Input( + tensor=keras.backend.variables_module.Variable(input_a_np, + dtype='float32')) + a_2 = keras.layers.Dense(4, name='dense_1')(a) + a_2 = keras.layers.Dropout(0.5, name='dropout')(a_2) + model = keras.models.Model(a, a_2) + model.add_loss(keras.backend.mean(a_2)) + + model.compile(optimizer='rmsprop', + loss=None, + metrics=['mean_squared_error']) + + # test train_on_batch + out = model.train_on_batch(None, None) + out = model.test_on_batch(None, None) + out = model.predict_on_batch(None) + + # test fit + with self.assertRaises(ValueError): + out = model.fit(None, None, epochs=1, batch_size=10) + out = model.fit(None, None, epochs=1, steps_per_epoch=1) + + # test fit with validation data + with self.assertRaises(ValueError): + out = model.fit(None, None, epochs=1, + steps_per_epoch=None, + validation_steps=2) + out = model.fit(None, None, epochs=1, + steps_per_epoch=2, + validation_steps=2) + + # test evaluate + with self.assertRaises(ValueError): + out = model.evaluate(None, None, batch_size=10) + out = model.evaluate(None, None, steps=3) + + # test predict + with self.assertRaises(ValueError): + out = model.predict(None, batch_size=10) + out = model.predict(None, steps=3) + self.assertEqual(out.shape, (10 * 3, 4)) + + # Test multi-output model with no external data at all. + a = keras.Input( + tensor=keras.backend.variables_module.Variable(input_a_np, + dtype='float32')) + a_1 = keras.layers.Dense(4, name='dense_1')(a) + a_2 = keras.layers.Dropout(0.5, name='dropout')(a_1) + model = keras.models.Model(a, [a_1, a_2]) + model.add_loss(keras.backend.mean(a_2)) + + model.compile(optimizer='rmsprop', + loss=None, + metrics=['mean_squared_error']) + + # test train_on_batch + out = model.train_on_batch(None, None) + out = model.test_on_batch(None, None) + out = model.predict_on_batch(None) + + # test fit + with self.assertRaises(ValueError): + out = model.fit(None, None, epochs=1, batch_size=10) + out = model.fit(None, None, epochs=1, steps_per_epoch=1) + + # test fit with validation data + out = model.fit(None, None, epochs=1, + steps_per_epoch=2, + validation_steps=2) + + # test evaluate + with self.assertRaises(ValueError): + out = model.evaluate(None, None, batch_size=10) + out = model.evaluate(None, None, steps=3) + + # test predict + with self.assertRaises(ValueError): + out = model.predict(None, batch_size=10, verbose=1) + out = model.predict(None, steps=3) + self.assertEqual(len(out), 2) + self.assertEqual(out[0].shape, (10 * 3, 4)) + self.assertEqual(out[1].shape, (10 * 3, 4)) + + def test_target_tensors(self): + with self.test_session(): + # single-output, as list + model = keras.models.Sequential() + model.add(keras.layers.Dense(4, input_shape=(4,), name='dense')) + input_val = np.random.random((10, 4)) + target_val = np.random.random((10, 4)) + target = keras.backend.variable(target_val) + model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target]) + model.train_on_batch(input_val, None) + + # single-output, as dict + model.compile(optimizer='rmsprop', loss='mse', + target_tensors={'dense': target}) + model.train_on_batch(input_val, None) + + # test invalid arguments + with self.assertRaises(TypeError): + model.compile(optimizer='rmsprop', loss='mse', + target_tensors=set()) + with self.assertRaises(ValueError): + model.compile(optimizer='rmsprop', loss='mse', + target_tensors=[target, target]) + with self.assertRaises(ValueError): + model.compile(optimizer='rmsprop', loss='mse', + target_tensors={'dense2': None}) + with self.assertRaises(ValueError): + model.compile(optimizer='rmsprop', loss='mse', + target_tensors=[target]) + model.train_on_batch(input_val, target_val) + + # multi-output, as list + input_val = np.random.random((10, 4)) + target_val_a = np.random.random((10, 4)) + target_val_b = np.random.random((10, 4)) + target_a = keras.backend.variable(target_val_a) + target_b = keras.backend.variable(target_val_b) + + inputs = keras.layers.Input(shape=(4,)) + output_a = keras.layers.Dense(4, name='dense_a')(inputs) + output_b = keras.layers.Dense(4, name='dense_b')(inputs) + model = keras.models.Model(inputs, [output_a, output_b]) + model.compile(optimizer='rmsprop', loss='mse', + target_tensors=[target_a, target_b]) + model.train_on_batch(input_val, None) + + # multi-output, as dict + model.compile(optimizer='rmsprop', loss='mse', + target_tensors={'dense_a': target_a, + 'dense_b': target_b}) + model.train_on_batch(input_val, None) + + # test with sample weights + model.compile(optimizer='rmsprop', loss='mse', + target_tensors=[target_a, target_b]) + model.train_on_batch(input_val, None, + sample_weight={'dense_a': np.random.random((10,))}) + + def test_model_custom_target_tensors(self): + with self.test_session(): + a = keras.Input(shape=(3,), name='input_a') + b = keras.Input(shape=(3,), name='input_b') + + a_2 = keras.layers.Dense(4, name='dense_1')(a) + dp = keras.layers.Dropout(0.5, name='dropout') + b_2 = dp(b) + + y = keras.backend.placeholder([10, 4], name='y') + y1 = keras.backend.placeholder([10, 3], name='y1') + y2 = keras.backend.placeholder([7, 5], name='y2') + model = keras.models.Model([a, b], [a_2, b_2]) + + optimizer = 'rmsprop' + loss = 'mse' + loss_weights = [1., 0.5] + + # test list of target tensors + with self.assertRaises(ValueError): + model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, + sample_weight_mode=None, target_tensors=[y, y1, y2]) + model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, + sample_weight_mode=None, target_tensors=[y, y1]) + input_a_np = np.random.random((10, 3)) + input_b_np = np.random.random((10, 3)) + + output_a_np = np.random.random((10, 4)) + output_b_np = np.random.random((10, 3)) + + _ = model.train_on_batch([input_a_np, input_b_np], + [output_a_np, output_b_np], + {y: np.random.random((10, 4)), + y1: np.random.random((10, 3))}) + # test dictionary of target_tensors + with self.assertRaises(ValueError): + model.compile(optimizer, loss, + metrics=[], + loss_weights=loss_weights, + sample_weight_mode=None, + target_tensors={'does_not_exist': y2}) + # test dictionary of target_tensors + model.compile(optimizer, loss, + metrics=[], + loss_weights=loss_weights, + sample_weight_mode=None, + target_tensors={'dense_1': y, 'dropout': y1}) + _ = model.train_on_batch([input_a_np, input_b_np], + [output_a_np, output_b_np], + {y: np.random.random((10, 4)), + y1: np.random.random((10, 3))}) + + # test with custom TF placeholder as target + pl_target_a = keras.backend.array_ops.placeholder('float32', + shape=(None, 4)) + model.compile(optimizer='rmsprop', loss='mse', + target_tensors={'dense_1': pl_target_a}) + model.train_on_batch([input_a_np, input_b_np], + [output_a_np, output_b_np]) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/keras/python/keras/layers/convolutional.py b/tensorflow/contrib/keras/python/keras/layers/convolutional.py index 9174f6df16..9eda94c1df 100644 --- a/tensorflow/contrib/keras/python/keras/layers/convolutional.py +++ b/tensorflow/contrib/keras/python/keras/layers/convolutional.py @@ -763,7 +763,7 @@ class SeparableConv2D(tf_convolutional_layers.SeparableConv2D, Layer): depthwise_regularizer: Regularizer function applied to the depthwise kernel matrix. pointwise_regularizer: Regularizer function applied to - the depthwise kernel matrix. + the pointwise kernel matrix. bias_regularizer: Regularizer function applied to the bias vector. activity_regularizer: Regularizer function applied to the output of the layer (its "activation").. @@ -1473,14 +1473,14 @@ class Cropping3D(Layer): spatial or spatio-temporal). Arguments: - cropping: int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. + cropping: int, or tuple of 23ints, or tuple of 3 tuples of 2 ints. - If int: the same symmetric cropping - is applied to width and height. - - If tuple of 2 ints: + is applied to depth, height, and width. + - If tuple of 3 ints: interpreted as two different - symmetric cropping values for height and width: + symmetric cropping values for depth, height, and width: `(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop)`. - - If tuple of 2 tuples of 2 ints: + - If tuple of 3 tuples of 2 ints: interpreted as `((left_dim1_crop, right_dim1_crop), (left_dim2_crop, right_dim2_crop), (left_dim3_crop, right_dim3_crop))` diff --git a/tensorflow/contrib/keras/python/keras/layers/convolutional_recurrent.py b/tensorflow/contrib/keras/python/keras/layers/convolutional_recurrent.py index 9ab2e72bf1..ed5aea2e57 100644 --- a/tensorflow/contrib/keras/python/keras/layers/convolutional_recurrent.py +++ b/tensorflow/contrib/keras/python/keras/layers/convolutional_recurrent.py @@ -151,18 +151,30 @@ class ConvRecurrent2D(Recurrent): dilation=self.dilation_rate[1]) if self.return_sequences: if self.data_format == 'channels_first': - return tensor_shape.TensorShape( - [input_shape[0], input_shape[1], self.filters, rows, cols]) + output_shape = [input_shape[0], input_shape[1], + self.filters, rows, cols] elif self.data_format == 'channels_last': - return tensor_shape.TensorShape( - [input_shape[0], input_shape[1], rows, cols, self.filters]) + output_shape = [input_shape[0], input_shape[1], + rows, cols, self.filters] else: if self.data_format == 'channels_first': - return tensor_shape.TensorShape( - [input_shape[0], self.filters, rows, cols]) + output_shape = [input_shape[0], self.filters, rows, cols] elif self.data_format == 'channels_last': - return tensor_shape.TensorShape( - [input_shape[0], rows, cols, self.filters]) + output_shape = [input_shape[0], rows, cols, self.filters] + + if self.return_state: + if self.data_format == 'channels_first': + output_shapes = [output_shape] + [(input_shape[0], + self.filters, + rows, + cols) for _ in range(2)] + elif self.data_format == 'channels_last': + output_shapes = [output_shape] + [(input_shape[0], + rows, + cols, + self.filters) for _ in range(2)] + return [tensor_shape.TensorShape(shape) for shape in output_shapes] + return tensor_shape.TensorShape(output_shape) def get_config(self): config = { @@ -447,7 +459,6 @@ class ConvLSTM2D(ConvRecurrent2D): if not self.stateful: raise RuntimeError('Layer must be stateful.') input_shape = self.input_spec[0].shape - output_shape = self._compute_output_shape(input_shape) if not input_shape[0]: raise ValueError('If a RNN is stateful, a complete ' @@ -455,20 +466,24 @@ class ConvLSTM2D(ConvRecurrent2D): '(including batch size). ' 'Got input shape: ' + str(input_shape)) + if self.return_state: + output_shape = tuple(self._compute_output_shape(input_shape)[0].as_list()) + else: + output_shape = tuple(self._compute_output_shape(input_shape).as_list()) if self.return_sequences: - out_row, out_col, out_filter = output_shape[2:] + output_shape = (input_shape[0],) + output_shape[2:] else: - out_row, out_col, out_filter = output_shape[1:] + output_shape = (input_shape[0],) + output_shape[1:] if hasattr(self, 'states'): K.set_value(self.states[0], - np.zeros((input_shape[0], out_row, out_col, out_filter))) + np.zeros(output_shape)) K.set_value(self.states[1], - np.zeros((input_shape[0], out_row, out_col, out_filter))) + np.zeros(output_shape)) else: self.states = [ - K.zeros((input_shape[0], out_row, out_col, out_filter)), - K.zeros((input_shape[0], out_row, out_col, out_filter)) + K.zeros(output_shape), + K.zeros(output_shape) ] def get_constants(self, inputs, training=None): diff --git a/tensorflow/contrib/keras/python/keras/layers/convolutional_recurrent_test.py b/tensorflow/contrib/keras/python/keras/layers/convolutional_recurrent_test.py index 06b2be6b68..1ce17f0c31 100644 --- a/tensorflow/contrib/keras/python/keras/layers/convolutional_recurrent_test.py +++ b/tensorflow/contrib/keras/python/keras/layers/convolutional_recurrent_test.py @@ -47,8 +47,27 @@ class ConvLSTMTest(test.TestCase): input_channel) for return_sequences in [True, False]: - # test for output shape: with self.test_session(): + # test for return state: + x = keras.Input(batch_shape=inputs.shape) + kwargs = {'data_format': data_format, + 'return_sequences': return_sequences, + 'return_state': True, + 'stateful': True, + 'filters': filters, + 'kernel_size': (num_row, num_col), + 'padding': 'valid'} + layer = keras.layers.ConvLSTM2D(**kwargs) + layer.build(inputs.shape) + outputs = layer(x) + _, states = outputs[0], outputs[1:] + self.assertEqual(len(states), 2) + model = keras.models.Model(x, states[0]) + state = model.predict(inputs) + self.assertAllClose( + keras.backend.eval(layer.states[0]), state, atol=1e-4) + + # test for output shape: testing_utils.layer_test( keras.layers.ConvLSTM2D, kwargs={'data_format': data_format, diff --git a/tensorflow/contrib/keras/python/keras/layers/core.py b/tensorflow/contrib/keras/python/keras/layers/core.py index c3df1c85d7..e5df0c5800 100644 --- a/tensorflow/contrib/keras/python/keras/layers/core.py +++ b/tensorflow/contrib/keras/python/keras/layers/core.py @@ -77,7 +77,7 @@ class Masking(Layer): def call(self, inputs): boolean_mask = K.any( K.not_equal(inputs, self.mask_value), axis=-1, keepdims=True) - return inputs * K.cast(boolean_mask, K.floatx()) + return inputs * K.cast(boolean_mask, inputs.dtype) def get_config(self): config = {'mask_value': self.mask_value} @@ -107,7 +107,10 @@ class Dropout(tf_core_layers.Dropout, Layer): self.supports_masking = True # Inheritance call order: # 1) tf.layers.Dropout, 2) keras.layers.Layer, 3) tf.layers.Layer - super(Dropout, self).__init__(rate=rate, noise_shape=noise_shape, seed=seed, **kwargs) + super(Dropout, self).__init__(rate=rate, + noise_shape=noise_shape, + seed=seed, + **kwargs) def call(self, inputs, training=None): if training is None: @@ -349,7 +352,7 @@ class Reshape(Layer): The new output shape with a -1 replaced with its computed value. Raises a ValueError if the total array size of the output_shape is - different then the input_shape, or more then one unknown dimension + different then the input_shape, or more than one unknown dimension is specified. Raises: diff --git a/tensorflow/contrib/keras/python/keras/layers/lstm_test.py b/tensorflow/contrib/keras/python/keras/layers/lstm_test.py index 7858b0e6b5..d39ea90523 100644 --- a/tensorflow/contrib/keras/python/keras/layers/lstm_test.py +++ b/tensorflow/contrib/keras/python/keras/layers/lstm_test.py @@ -337,6 +337,33 @@ class LSTMLayerTest(test.TestCase): inputs = np.random.random((num_samples, timesteps, embedding_dim)) outputs = model.predict(inputs) + def test_initial_states_as_other_inputs(self): + timesteps = 3 + embedding_dim = 4 + units = 3 + num_samples = 2 + num_states = 2 + layer_class = keras.layers.LSTM + + with self.test_session(): + # Test with Keras tensor + main_inputs = keras.Input((timesteps, embedding_dim)) + initial_state = [keras.Input((units,)) for _ in range(num_states)] + inputs = [main_inputs] + initial_state + + layer = layer_class(units) + output = layer(inputs) + assert initial_state[0] in layer.inbound_nodes[0].input_tensors + + model = keras.models.Model(inputs, output) + model.compile(loss='categorical_crossentropy', optimizer='adam') + + main_inputs = np.random.random((num_samples, timesteps, embedding_dim)) + initial_state = [np.random.random((num_samples, units)) + for _ in range(num_states)] + targets = np.random.random((num_samples, units)) + model.train_on_batch([main_inputs] + initial_state, targets) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/keras/python/keras/layers/merge.py b/tensorflow/contrib/keras/python/keras/layers/merge.py index 64d0c40e61..486036a156 100644 --- a/tensorflow/contrib/keras/python/keras/layers/merge.py +++ b/tensorflow/contrib/keras/python/keras/layers/merge.py @@ -223,6 +223,37 @@ class Add(_Merge): return output +class Subtract(_Merge): + """Layer that subtracts two inputs. + + It takes as input a list of tensors of size 2, + both of the same shape, and returns a single tensor, (inputs[0] - inputs[1]), + also of the same shape. + + Examples: + + ```python + import keras + + input1 = keras.layers.Input(shape=(16,)) + x1 = keras.layers.Dense(8, activation='relu')(input1) + input2 = keras.layers.Input(shape=(32,)) + x2 = keras.layers.Dense(8, activation='relu')(input2) + # Equivalent to subtracted = keras.layers.subtract([x1, x2]) + subtracted = keras.layers.Subtract()([x1, x2]) + + out = keras.layers.Dense(4)(subtracted) + model = keras.models.Model(inputs=[input1, input2], outputs=out) + ``` + """ + + def _merge_function(self, inputs): + if len(inputs) != 2: + raise ValueError('`Subtract` layer should be called ' + 'on exactly 2 inputs. Received: %s' % inputs) + return inputs[0] - inputs[1] + + class Multiply(_Merge): """Layer that multiplies (element-wise) a list of inputs. @@ -486,6 +517,34 @@ def add(inputs, **kwargs): return Add(**kwargs)(inputs) +def subtract(inputs, **kwargs): + """Functional interface to the `Subtract` layer. + + Arguments: + inputs: A list of input tensors (exactly 2). + **kwargs: Standard layer keyword arguments. + + Returns: + A tensor, the difference of the inputs. + + Examples: + + ```python + import keras + + input1 = keras.layers.Input(shape=(16,)) + x1 = keras.layers.Dense(8, activation='relu')(input1) + input2 = keras.layers.Input(shape=(32,)) + x2 = keras.layers.Dense(8, activation='relu')(input2) + subtracted = keras.layers.subtract([x1, x2]) + + out = keras.layers.Dense(4)(subtracted) + model = keras.models.Model(inputs=[input1, input2], outputs=out) + ``` + """ + return Subtract(**kwargs)(inputs) + + def multiply(inputs, **kwargs): """Functional interface to the `Multiply` layer. diff --git a/tensorflow/contrib/keras/python/keras/layers/merge_test.py b/tensorflow/contrib/keras/python/keras/layers/merge_test.py index 4a365c2c44..aca6728e2a 100644 --- a/tensorflow/contrib/keras/python/keras/layers/merge_test.py +++ b/tensorflow/contrib/keras/python/keras/layers/merge_test.py @@ -194,6 +194,20 @@ class MergeLayersTest(test.TestCase): dot = keras.layers.Dot(1) dot._compute_output_shape(1) + def test_merge_subtract(self): + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(4, 5)) + y = keras.layers.subtract([i1, i2]) + self.assertEqual(y.get_shape().as_list(), [None, 4, 5]) + + # Test invalid use cases + i1 = keras.layers.Input(shape=(4, 5)) + i2 = keras.layers.Input(shape=(3, 5)) + with self.assertRaises(ValueError): + keras.layers.subtract([i1, i2]) + with self.assertRaises(ValueError): + keras.layers.subtract([i1, i1, i1]) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/keras/python/keras/layers/recurrent.py b/tensorflow/contrib/keras/python/keras/layers/recurrent.py index 592e5f5e3a..988ddf54cc 100644 --- a/tensorflow/contrib/keras/python/keras/layers/recurrent.py +++ b/tensorflow/contrib/keras/python/keras/layers/recurrent.py @@ -279,6 +279,12 @@ class Recurrent(Layer): return inputs def __call__(self, inputs, initial_state=None, **kwargs): + if (isinstance(inputs, (list, tuple)) and + len(inputs) > 1 + and initial_state is None): + initial_state = inputs[1:] + inputs = inputs[0] + # If `initial_state` is specified, # and if it a Keras tensor, # then add it to the inputs and temporarily diff --git a/tensorflow/contrib/keras/python/keras/layers/wrappers.py b/tensorflow/contrib/keras/python/keras/layers/wrappers.py index aee02f432e..9defd6cd1c 100644 --- a/tensorflow/contrib/keras/python/keras/layers/wrappers.py +++ b/tensorflow/contrib/keras/python/keras/layers/wrappers.py @@ -26,6 +26,7 @@ from tensorflow.contrib.keras.python.keras.engine import InputSpec from tensorflow.contrib.keras.python.keras.engine import Layer from tensorflow.contrib.keras.python.keras.utils.generic_utils import has_arg from tensorflow.python.framework import tensor_shape +from tensorflow.python.layers import base as tf_base_layers class Wrapper(Layer): @@ -41,6 +42,10 @@ class Wrapper(Layer): def __init__(self, layer, **kwargs): self.layer = layer + # Tracks mapping of Wrapper inputs to inner layer inputs. Useful when + # the inner layer has update ops that depend on its inputs (as opposed + # to the inputs to the Wrapper layer). + self._input_map = {} super(Wrapper, self).__init__(**kwargs) def build(self, input_shape=None): @@ -68,10 +73,17 @@ class Wrapper(Layer): return [] def get_updates_for(self, inputs=None): - if inputs is None: - updates = self.layer.get_updates_for(None) - return updates + super(Wrapper, self).get_updates_for(None) - return super(Wrapper, self).get_updates_for(inputs) + # If the wrapper modifies the inputs, use the modified inputs to + # get the updates from the inner layer. + inner_inputs = inputs + if inputs is not None: + uid = tf_base_layers._object_list_uid(inputs) + if uid in self._input_map: + inner_inputs = self._input_map[uid] + + updates = self.layer.get_updates_for(inner_inputs) + updates += super(Wrapper, self).get_updates_for(inputs) + return updates @property def losses(self): @@ -213,8 +225,11 @@ class TimeDistributed(Wrapper): input_length = input_shape[1] if not input_length: input_length = K.shape(inputs)[1] - # Shape: (num_samples * timesteps, ...) + # Shape: (num_samples * timesteps, ...). And track the + # transformation in self._input_map. + input_uid = tf_base_layers._object_list_uid(inputs) inputs = K.reshape(inputs, (-1,) + input_shape[2:]) + self._input_map[input_uid] = inputs # (num_samples * timesteps, ...) y = self.layer.call(inputs, **kwargs) if hasattr(y, '_uses_learning_phase'): diff --git a/tensorflow/contrib/keras/python/keras/layers/wrappers_test.py b/tensorflow/contrib/keras/python/keras/layers/wrappers_test.py index 531fa76dd8..d8e6c89564 100644 --- a/tensorflow/contrib/keras/python/keras/layers/wrappers_test.py +++ b/tensorflow/contrib/keras/python/keras/layers/wrappers_test.py @@ -101,14 +101,37 @@ class TimeDistributedTest(test.TestCase): self.assertEqual(len(model.losses), 1) def test_TimeDistributed_learning_phase(self): - # test layers that need learning_phase to be set - np.random.seed(1234) - x = keras.layers.Input(shape=(3, 2)) - y = keras.layers.TimeDistributed( - keras.layers.Dropout(.999))(x, training=True) - model = keras.models.Model(x, y) - y = model.predict(np.random.random((10, 3, 2))) - self.assertAllClose(np.mean(y), 0., atol=1e-1, rtol=1e-1) + with self.test_session(): + # test layers that need learning_phase to be set + np.random.seed(1234) + x = keras.layers.Input(shape=(3, 2)) + y = keras.layers.TimeDistributed( + keras.layers.Dropout(.999))(x, training=True) + model = keras.models.Model(x, y) + y = model.predict(np.random.random((10, 3, 2))) + self.assertAllClose(np.mean(y), 0., atol=1e-1, rtol=1e-1) + + def test_TimeDistributed_batchnorm(self): + with self.test_session(): + # test that wrapped BN updates still work. + model = keras.models.Sequential() + model.add(keras.layers.TimeDistributed( + keras.layers.BatchNormalization(center=True, scale=True), + name='bn', + input_shape=(10, 2))) + model.compile(optimizer='rmsprop', loss='mse') + # Assert that mean and variance are 0 and 1. + td = model.layers[0] + self.assertAllClose(td.get_weights()[2], np.array([0, 0])) + assert np.array_equal(td.get_weights()[3], np.array([1, 1])) + # Train + model.train_on_batch(np.random.normal(loc=2, scale=2, size=(1, 10, 2)), + np.broadcast_to(np.array([0, 1]), (1, 10, 2))) + # Assert that mean and variance changed. + assert not np.array_equal(td.get_weights()[2], np.array([0, 0])) + assert not np.array_equal(td.get_weights()[3], np.array([1, 1])) + # Verify input_map has one mapping from inputs to reshaped inputs. + self.assertEqual(len(td._input_map.keys()), 1) class BidirectionalTest(test.TestCase): diff --git a/tensorflow/contrib/keras/python/keras/losses.py b/tensorflow/contrib/keras/python/keras/losses.py index 777ec440ac..e94fca479f 100644 --- a/tensorflow/contrib/keras/python/keras/losses.py +++ b/tensorflow/contrib/keras/python/keras/losses.py @@ -67,15 +67,15 @@ def logcosh(y_true, y_pred): def categorical_crossentropy(y_true, y_pred): - return K.categorical_crossentropy(y_pred, y_true) + return K.categorical_crossentropy(y_true, y_pred) def sparse_categorical_crossentropy(y_true, y_pred): - return K.sparse_categorical_crossentropy(y_pred, y_true) + return K.sparse_categorical_crossentropy(y_true, y_pred) def binary_crossentropy(y_true, y_pred): - return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) + return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) def kullback_leibler_divergence(y_true, y_pred): diff --git a/tensorflow/contrib/keras/python/keras/models.py b/tensorflow/contrib/keras/python/keras/models.py index 1a0d95c7ff..ff06782a44 100644 --- a/tensorflow/contrib/keras/python/keras/models.py +++ b/tensorflow/contrib/keras/python/keras/models.py @@ -34,6 +34,7 @@ from tensorflow.contrib.keras.python.keras.engine.topology import Input from tensorflow.contrib.keras.python.keras.engine.topology import Layer from tensorflow.contrib.keras.python.keras.engine.topology import TFBaseLayer from tensorflow.contrib.keras.python.keras.engine.training import Model +from tensorflow.contrib.keras.python.keras.utils.generic_utils import has_arg from tensorflow.contrib.keras.python.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.framework import ops from tensorflow.python.platform import tf_logging as logging @@ -121,66 +122,65 @@ def save_model(model, filepath, overwrite=True, include_optimizer=True): if not proceed: return - f = h5py.File(filepath, 'w') - f.attrs['keras_version'] = str(keras_version).encode('utf8') - f.attrs['backend'] = K.backend().encode('utf8') - f.attrs['model_config'] = json.dumps( - { - 'class_name': model.__class__.__name__, - 'config': model.get_config() - }, - default=get_json_type).encode('utf8') - - model_weights_group = f.create_group('model_weights') - model_layers = model.layers - topology.save_weights_to_hdf5_group(model_weights_group, model_layers) - - if include_optimizer and hasattr(model, 'optimizer'): - if isinstance(model.optimizer, optimizers.TFOptimizer): - logging.warning( - 'TensorFlow optimizers do not ' - 'make it possible to access ' - 'optimizer attributes or optimizer state ' - 'after instantiation. ' - 'As a result, we cannot save the optimizer ' - 'as part of the model save file.' - 'You will have to compile your model again after loading it. ' - 'Prefer using a Keras optimizer instead ' - '(see keras.io/optimizers).') - else: - f.attrs['training_config'] = json.dumps( - { - 'optimizer_config': { - 'class_name': model.optimizer.__class__.__name__, - 'config': model.optimizer.get_config() - }, - 'loss': model.loss, - 'metrics': model.metrics, - 'sample_weight_mode': model.sample_weight_mode, - 'loss_weights': model.loss_weights, - }, - default=get_json_type).encode('utf8') - - # Save optimizer weights. - symbolic_weights = getattr(model.optimizer, 'weights') - if symbolic_weights: - optimizer_weights_group = f.create_group('optimizer_weights') - weight_values = K.batch_get_value(symbolic_weights) - weight_names = [] - for w, val in zip(symbolic_weights, weight_values): - name = str(w.name) - weight_names.append(name.encode('utf8')) - optimizer_weights_group.attrs['weight_names'] = weight_names - for name, val in zip(weight_names, weight_values): - param_dset = optimizer_weights_group.create_dataset( - name, val.shape, dtype=val.dtype) - if not val.shape: - # scalar - param_dset[()] = val - else: - param_dset[:] = val - f.flush() - f.close() + with h5py.File(filepath, mode='w') as f: + f.attrs['keras_version'] = str(keras_version).encode('utf8') + f.attrs['backend'] = K.backend().encode('utf8') + f.attrs['model_config'] = json.dumps( + { + 'class_name': model.__class__.__name__, + 'config': model.get_config() + }, + default=get_json_type).encode('utf8') + + model_weights_group = f.create_group('model_weights') + model_layers = model.layers + topology.save_weights_to_hdf5_group(model_weights_group, model_layers) + + if include_optimizer and hasattr(model, 'optimizer'): + if isinstance(model.optimizer, optimizers.TFOptimizer): + logging.warning( + 'TensorFlow optimizers do not ' + 'make it possible to access ' + 'optimizer attributes or optimizer state ' + 'after instantiation. ' + 'As a result, we cannot save the optimizer ' + 'as part of the model save file.' + 'You will have to compile your model again after loading it. ' + 'Prefer using a Keras optimizer instead ' + '(see keras.io/optimizers).') + else: + f.attrs['training_config'] = json.dumps( + { + 'optimizer_config': { + 'class_name': model.optimizer.__class__.__name__, + 'config': model.optimizer.get_config() + }, + 'loss': model.loss, + 'metrics': model.metrics, + 'sample_weight_mode': model.sample_weight_mode, + 'loss_weights': model.loss_weights, + }, + default=get_json_type).encode('utf8') + + # Save optimizer weights. + symbolic_weights = getattr(model.optimizer, 'weights') + if symbolic_weights: + optimizer_weights_group = f.create_group('optimizer_weights') + weight_values = K.batch_get_value(symbolic_weights) + weight_names = [] + for w, val in zip(symbolic_weights, weight_values): + name = str(w.name) + weight_names.append(name.encode('utf8')) + optimizer_weights_group.attrs['weight_names'] = weight_names + for name, val in zip(weight_names, weight_values): + param_dset = optimizer_weights_group.create_dataset( + name, val.shape, dtype=val.dtype) + if not val.shape: + # scalar + param_dset[()] = val + else: + param_dset[:] = val + f.flush() def load_model(filepath, custom_objects=None, compile=True): # pylint: disable=redefined-builtin @@ -545,12 +545,12 @@ class Sequential(Model): Returns: A layer instance. """ - if self.model is None: + if not self.built: self.build() return self.model.get_layer(name, index) def call(self, inputs, mask=None): - if self.model is None: + if not self.built: self.build() return self.model.call(inputs, mask) @@ -586,7 +586,7 @@ class Sequential(Model): @property def uses_learning_phase(self): - if self.model is None: + if not self.built: self.build() return self.model.uses_learning_phase @@ -622,35 +622,35 @@ class Sequential(Model): @property def updates(self): - if self.model is None: + if not self.built: self.build() return self.model.updates @property def state_updates(self): - if self.model is None: + if not self.built: self.build() return self.model.state_updates def get_updates_for(self, inputs): - if self.model is None: + if not self.built: self.build() return self.model.get_updates_for(inputs) @property def losses(self): - if self.model is None: + if not self.built: self.build() return self.model.losses def get_losses_for(self, inputs): - if self.model is None: + if not self.built: self.build() return self.model.get_losses_for(inputs) @property def regularizers(self): - if self.model is None: + if not self.built: self.build() return self.model.regularizers @@ -661,7 +661,7 @@ class Sequential(Model): A flat list of Numpy arrays (one array per model weight). """ - if self.model is None: + if not self.built: self.build() return self.model.get_weights() @@ -673,7 +673,7 @@ class Sequential(Model): of Numpy arrays with shapes and types matching the output of `model.get_weights()`. """ - if self.model is None: + if not self.built: self.build() self.model.set_weights(weights) @@ -710,6 +710,7 @@ class Sequential(Model): loss, metrics=None, sample_weight_mode=None, + weighted_metrics=None, **kwargs): """Configures the learning process. @@ -725,9 +726,9 @@ class Sequential(Model): sample_weight_mode: if you need to do timestep-wise sample weighting (2D weights), set this to "temporal". "None" defaults to sample-wise weights (1D). - **kwargs: for Theano backend, these are passed into K.function. - When using the Tensorflow backend, these are passed into - `tf.Session.run`. + weighted_metrics: list of metrics to be evaluated and weighted + by `sample_weight` or `class_weight` during training and testing. + **kwargs: These are passed into `tf.Session.run`. Example: ```python @@ -747,12 +748,14 @@ class Sequential(Model): loss, metrics=metrics, sample_weight_mode=sample_weight_mode, + weighted_metrics=weighted_metrics, **kwargs) self.optimizer = self.model.optimizer self.loss = self.model.loss self.total_loss = self.model.total_loss self.loss_weights = self.model.loss_weights self.metrics = self.model.metrics + self.weighted_metrics = self.model.weighted_metrics self.metrics_tensors = self.model.metrics_tensors self.metrics_names = self.model.metrics_names self.sample_weight_mode = self.model.sample_weight_mode @@ -818,7 +821,7 @@ class Sequential(Model): Raises: RuntimeError: if the model was never compiled. """ - if self.model is None: + if not self.built: raise RuntimeError('The model needs to be compiled ' 'before being used.') return self.model.fit( x, @@ -854,7 +857,7 @@ class Sequential(Model): Raises: RuntimeError: if the model was never compiled. """ - if self.model is None: + if not self.built: raise RuntimeError('The model needs to be compiled ' 'before being used.') return self.model.evaluate( x, @@ -876,7 +879,7 @@ class Sequential(Model): Returns: A Numpy array of predictions. """ - if self.model is None: + if not self.built: self.build() return self.model.predict(x, batch_size=batch_size, verbose=verbose) @@ -890,7 +893,7 @@ class Sequential(Model): Returns: A Numpy array of predictions. """ - if self.model is None: + if not self.built: self.build() return self.model.predict_on_batch(x) @@ -914,7 +917,7 @@ class Sequential(Model): Raises: RuntimeError: if the model was never compiled. """ - if self.model is None: + if not self.built: raise RuntimeError('The model needs to be compiled ' 'before being used.') return self.model.train_on_batch( x, y, sample_weight=sample_weight, class_weight=class_weight) @@ -937,7 +940,7 @@ class Sequential(Model): Raises: RuntimeError: if the model was never compiled. """ - if self.model is None: + if not self.built: raise RuntimeError('The model needs to be compiled ' 'before being used.') return self.model.test_on_batch(x, y, sample_weight=sample_weight) @@ -1083,7 +1086,7 @@ class Sequential(Model): if kwargs: raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) - if self.model is None: + if not self.built: raise RuntimeError('The model needs to be compiled ' 'before being used.') return self.model.fit_generator( generator, @@ -1149,7 +1152,7 @@ class Sequential(Model): if kwargs: raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) - if self.model is None: + if not self.built: raise RuntimeError('The model needs to be compiled ' 'before being used.') return self.model.evaluate_generator( generator, @@ -1205,7 +1208,7 @@ class Sequential(Model): if kwargs: raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) - if self.model is None: + if not self.built: self.build() return self.model.predict_generator( generator, @@ -1231,3 +1234,225 @@ class Sequential(Model): layer = layer_module.deserialize(conf, custom_objects=custom_objects) model.add(layer) return model + + +def _clone_functional_model(model, input_tensors=None): + """Clone a functional `Model` instance. + + Model cloning is similar to calling a model on new inputs, + except that it creates new layers (and thus new weights) instead + of sharing the weights of the existing layers. + + Arguments: + model: Instance of `Model`. + input_tensors: optional list of input tensors + to build the model upon. If not provided, + placeholders will be created. + + Returns: + An instance of `Model` reproducing the behavior + of the original model, on top of new inputs tensors, + using newly instantiated weights. + + Raises: + ValueError: in case of invalid `model` argument value. + """ + if not isinstance(model, Model): + raise ValueError('Expected `model` argument ' + 'to be a `Model` instance, got ', model) + if isinstance(model, Sequential): + raise ValueError('Expected `model` argument ' + 'to be a functional `Model` instance, ' + 'got a `Sequential` instance instead:', model) + + layer_map = {} # Cache for created layers. + tensor_map = {} # Map {reference_tensor: (corresponding_tensor, mask)} + if input_tensors is None: + # Create placeholders to build the model on top of. + input_layers = [] + input_tensors = [] + for layer in model._input_layers: + input_tensor = Input( + batch_shape=layer.batch_input_shape, + dtype=layer.dtype, + sparse=layer.sparse, + name=layer.name) + input_tensors.append(input_tensor) + # Cache newly created input layer. + newly_created_input_layer = input_tensor._keras_history[0] + layer_map[layer] = newly_created_input_layer + for original_input_layer, cloned_input_layer in zip(model._input_layers, + input_layers): + layer_map[original_input_layer] = cloned_input_layer + else: + # Make sure that all input tensors come from a Keras layer. + # If tensor comes from an input layer: cache the input layer. + input_tensors = topology._to_list(input_tensors) + input_tensors_ = [] + for i, x in enumerate(input_tensors): + if not K.is_keras_tensor(x): + name = model._input_layers[i].name + input_tensor = Input(tensor=x, name='input_wrapper_for_' + name) + input_tensors_.append(input_tensor) + # Cache newly created input layer. + original_input_layer = x._keras_history[0] + newly_created_input_layer = input_tensor._keras_history[0] + layer_map[original_input_layer] = newly_created_input_layer + else: + input_tensors_.append(x) + input_tensors = input_tensors_ + + for x, y in zip(model.inputs, input_tensors): + tensor_map[x] = (y, None) # tensor, mask + + # Iterated over every node in the reference model, in depth order. + depth_keys = list(model._nodes_by_depth.keys()) + depth_keys.sort(reverse=True) + for depth in depth_keys: + nodes = model._nodes_by_depth[depth] + for node in nodes: + # Recover the corresponding layer. + layer = node.outbound_layer + + # Get or create layer. + if layer not in layer_map: + # Clone layer. + new_layer = layer.__class__.from_config(layer.get_config()) + layer_map[layer] = new_layer + layer = new_layer + else: + # Reuse previously cloned layer. + layer = layer_map[layer] + # Don't call InputLayer multiple times. + if isinstance(layer, topology.InputLayer): + continue + + # Gather inputs to call the new layer. + referenceinput_tensors_ = node.input_tensors + reference_output_tensors = node.output_tensors + + # If all previous input tensors are available in tensor_map, + # then call node.inbound_layer on them. + computed_data = [] # List of tuples (input, mask). + for x in referenceinput_tensors_: + if x in tensor_map: + computed_data.append(tensor_map[x]) + + if len(computed_data) == len(referenceinput_tensors_): + # Call layer. + if node.arguments: + kwargs = node.arguments + else: + kwargs = {} + if len(computed_data) == 1: + computed_tensor, computed_mask = computed_data[0] + if has_arg(layer.call, 'mask'): + if 'mask' not in kwargs: + kwargs['mask'] = computed_mask + output_tensors = topology._to_list(layer(computed_tensor, **kwargs)) + output_masks = topology._to_list( + layer.compute_mask(computed_tensor, computed_mask)) + computed_tensors = [computed_tensor] + computed_masks = [computed_mask] + else: + computed_tensors = [x[0] for x in computed_data] + computed_masks = [x[1] for x in computed_data] + if has_arg(layer.call, 'mask'): + if 'mask' not in kwargs: + kwargs['mask'] = computed_masks + output_tensors = topology._to_list(layer(computed_tensors, **kwargs)) + output_masks = topology._to_list( + layer.compute_mask(computed_tensors, computed_masks)) + # Update tensor_map. + for x, y, mask in zip(reference_output_tensors, output_tensors, + output_masks): + tensor_map[x] = (y, mask) + + # Check that we did compute the model outputs, + # then instantiate a new model from inputs and outputs. + output_tensors = [] + for x in model.outputs: + assert x in tensor_map, 'Could not compute output ' + str(x) + tensor, _ = tensor_map[x] + output_tensors.append(tensor) + return Model(input_tensors, output_tensors, name=model.name) + + +def _clone_sequential_model(model, input_tensors=None): + """Clone a `Sequential` model instance. + + Model cloning is similar to calling a model on new inputs, + except that it creates new layers (and thus new weights) instead + of sharing the weights of the existing layers. + + Arguments: + model: Instance of `Sequential`. + input_tensors: optional list of input tensors + to build the model upon. If not provided, + placeholders will be created. + + Returns: + An instance of `Sequential` reproducing the behavior + of the original model, on top of new inputs tensors, + using newly instantiated weights. + + Raises: + ValueError: in case of invalid `model` argument value. + """ + if not isinstance(model, Sequential): + raise ValueError('Expected `model` argument ' + 'to be a `Sequential` model instance, ' + 'but got:', model) + + def clone(layer): + return layer.__class__.from_config(layer.get_config()) + + layers = [clone(layer) for layer in model.layers] + if input_tensors is None: + return Sequential(layers=layers, name=model.name) + else: + if len(topology._to_list(input_tensors)) != 1: + raise ValueError('To clone a `Sequential` model, we expect ' + ' at most one tensor ' + 'as part of `input_tensors`.') + x = topology._to_list(input_tensors)[0] + if K.is_keras_tensor(x): + origin_layer = x._keras_history[0] + if isinstance(origin_layer, topology.InputLayer): + return Sequential(layers=[origin_layer] + layers, name=model.name) + else: + raise ValueError('Cannot clone a `Sequential` model on top ' + 'of a tensor that comes from a Keras layer ' + 'other than an `InputLayer`. ' + 'Use the functional API instead.') + input_tensor = Input(tensor=x, name='input_wrapper_for_' + str(x.name)) + input_layer = input_tensor._keras_history[0] + return Sequential(layers=[input_layer] + layers, name=model.name) + + +def clone_model(model, input_tensors=None): + """Clone any `Model` instance. + + Model cloning is similar to calling a model on new inputs, + except that it creates new layers (and thus new weights) instead + of sharing the weights of the existing layers. + + Arguments: + model: Instance of `Model` + (could be a functional model or a Sequential model). + input_tensors: optional list of input tensors + to build the model upon. If not provided, + placeholders will be created. + + Returns: + An instance of `Model` reproducing the behavior + of the original model, on top of new inputs tensors, + using newly instantiated weights. + + Raises: + ValueError: in case of invalid `model` argument value. + """ + if isinstance(model, Sequential): + return _clone_sequential_model(model, input_tensors=input_tensors) + else: + return _clone_functional_model(model, input_tensors=input_tensors) diff --git a/tensorflow/contrib/keras/python/keras/models_test.py b/tensorflow/contrib/keras/python/keras/models_test.py index 44088a1b32..1a4e6fb4c8 100644 --- a/tensorflow/contrib/keras/python/keras/models_test.py +++ b/tensorflow/contrib/keras/python/keras/models_test.py @@ -316,5 +316,104 @@ class TestSequential(test.TestCase): model.build() +class TestModelCloning(test.TestCase): + + def test_clone_sequential_model(self): + with self.test_session(): + val_a = np.random.random((10, 4)) + val_out = np.random.random((10, 4)) + + model = keras.models.Sequential() + model.add(keras.layers.Dense(4, input_shape=(4,))) + model.add(keras.layers.Dropout(0.5)) + model.add(keras.layers.Dense(4)) + + # Everything should work in a new session. + keras.backend.clear_session() + + with self.test_session(): + # With placeholder creation + new_model = keras.models.clone_model(model) + new_model.compile('rmsprop', 'mse') + new_model.train_on_batch(val_a, val_out) + + # On top of new tensor + input_a = keras.Input(shape=(4,)) + new_model = keras.models.clone_model( + model, input_tensors=input_a) + new_model.compile('rmsprop', 'mse') + new_model.train_on_batch(val_a, val_out) + + # On top of new, non-Keras tensor + input_a = keras.backend.variable(val_a) + new_model = keras.models.clone_model( + model, input_tensors=input_a) + new_model.compile('rmsprop', 'mse') + new_model.train_on_batch(None, val_out) + + def test_clone_functional_model(self): + with self.test_session(): + val_a = np.random.random((10, 4)) + val_b = np.random.random((10, 4)) + val_out = np.random.random((10, 4)) + + input_a = keras.Input(shape=(4,)) + input_b = keras.Input(shape=(4,)) + dense_1 = keras.layers.Dense(4,) + dense_2 = keras.layers.Dense(4,) + + x_a = dense_1(input_a) + x_a = keras.layers.Dropout(0.5)(x_a) + x_b = dense_1(input_b) + x_a = dense_2(x_a) + outputs = keras.layers.add([x_a, x_b]) + model = keras.models.Model([input_a, input_b], outputs) + + # Everything should work in a new session. + keras.backend.clear_session() + + with self.test_session(): + # With placeholder creation + new_model = keras.models.clone_model(model) + new_model.compile('rmsprop', 'mse') + new_model.train_on_batch([val_a, val_b], val_out) + + # On top of new tensors + input_a = keras.Input(shape=(4,), name='a') + input_b = keras.Input(shape=(4,), name='b') + new_model = keras.models.clone_model( + model, input_tensors=[input_a, input_b]) + new_model.compile('rmsprop', 'mse') + new_model.train_on_batch([val_a, val_b], val_out) + + # On top of new, non-Keras tensors + input_a = keras.backend.variable(val_a) + input_b = keras.backend.variable(val_b) + new_model = keras.models.clone_model( + model, input_tensors=[input_a, input_b]) + new_model.compile('rmsprop', 'mse') + new_model.train_on_batch(None, val_out) + + def test_model_cloning_invalid_use_cases(self): + seq_model = keras.models.Sequential() + seq_model.add(keras.layers.Dense(4, input_shape=(4,))) + + x = keras.Input((4,)) + y = keras.layers.Dense(4)(x) + fn_model = keras.models.Model(x, y) + + with self.assertRaises(ValueError): + keras.models._clone_functional_model(seq_model) + with self.assertRaises(ValueError): + keras.models._clone_functional_model(None) + with self.assertRaises(ValueError): + keras.models._clone_sequential_model(fn_model) + + with self.assertRaises(ValueError): + keras.models._clone_sequential_model(seq_model, input_tensors=[x, x]) + with self.assertRaises(ValueError): + keras.models._clone_sequential_model(seq_model, input_tensors=y) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/keras/python/keras/optimizers.py b/tensorflow/contrib/keras/python/keras/optimizers.py index a1bd3be026..f137563d6d 100644 --- a/tensorflow/contrib/keras/python/keras/optimizers.py +++ b/tensorflow/contrib/keras/python/keras/optimizers.py @@ -88,7 +88,7 @@ class Optimizer(object): self.updates = [] self.weights = [] - def get_updates(self, params, constraints, loss): + def get_updates(self, loss, params): raise NotImplementedError def get_gradients(self, loss, params): @@ -163,21 +163,22 @@ class SGD(Optimizer): def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False, **kwargs): super(SGD, self).__init__(**kwargs) - self.iterations = K.variable(0., name='iterations') - self.lr = K.variable(lr, name='lr') - self.momentum = K.variable(momentum, name='momentum') - self.decay = K.variable(decay, name='decay') + with K.name_scope(self.__class__.__name__): + self.iterations = K.variable(0, dtype='int64', name='iterations') + self.lr = K.variable(lr, name='lr') + self.momentum = K.variable(momentum, name='momentum') + self.decay = K.variable(decay, name='decay') self.initial_decay = decay self.nesterov = nesterov - def get_updates(self, params, constraints, loss): + def get_updates(self, loss, params): grads = self.get_gradients(loss, params) - self.updates = [] + self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr *= (1. / (1. + self.decay * self.iterations)) - self.updates.append(K.update_add(self.iterations, 1)) + lr *= (1. / (1. + self.decay * K.cast(self.iterations, + K.dtype(self.decay)))) # momentum shapes = [K.int_shape(p) for p in params] @@ -192,10 +193,9 @@ class SGD(Optimizer): else: new_p = p + v - # apply constraints - if p in constraints: - c = constraints[p] - new_p = c(new_p) + # Apply constraints. + if getattr(p, 'constraint', None) is not None: + new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates @@ -212,7 +212,6 @@ class SGD(Optimizer): class RMSprop(Optimizer): - # pylint: disable=line-too-long """RMSProp optimizer. It is recommended to leave the parameters of this optimizer @@ -227,34 +226,30 @@ class RMSprop(Optimizer): rho: float >= 0. epsilon: float >= 0. Fuzz factor. decay: float >= 0. Learning rate decay over each update. - - References: - - [rmsprop: Divide the gradient by a running average of its recent - magnitude](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) """ - # pylint: enable=line-too-long - def __init__(self, lr=0.001, rho=0.9, epsilon=1e-8, decay=0., **kwargs): super(RMSprop, self).__init__(**kwargs) - self.lr = K.variable(lr, name='lr') - self.rho = K.variable(rho, name='rho') + with K.name_scope(self.__class__.__name__): + self.lr = K.variable(lr, name='lr') + self.rho = K.variable(rho, name='rho') + self.decay = K.variable(decay, name='decay') + self.iterations = K.variable(0, dtype='int64', name='iterations') self.epsilon = epsilon - self.decay = K.variable(decay, name='decay') self.initial_decay = decay - self.iterations = K.variable(0., name='iterations') - def get_updates(self, params, constraints, loss): + def get_updates(self, loss, params): grads = self.get_gradients(loss, params) - shapes = [K.int_shape(p) for p in params] - accumulators = [K.zeros(shape) for shape in shapes] + accumulators = [ + K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params + ] self.weights = accumulators - self.updates = [] + self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr *= (1. / (1. + self.decay * self.iterations)) - self.updates.append(K.update_add(self.iterations, 1)) + lr *= (1. / (1. + self.decay * K.cast(self.iterations, + K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): # update accumulator @@ -262,10 +257,10 @@ class RMSprop(Optimizer): self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) - # apply constraints - if p in constraints: - c = constraints[p] - new_p = c(new_p) + # Apply constraints. + if getattr(p, 'constraint', None) is not None: + new_p = p.constraint(new_p) + self.updates.append(K.update(p, new_p)) return self.updates @@ -281,7 +276,6 @@ class RMSprop(Optimizer): class Adagrad(Optimizer): - # pylint: disable=line-too-long """Adagrad optimizer. It is recommended to leave the parameters of this optimizer @@ -297,36 +291,36 @@ class Adagrad(Optimizer): Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) """ - # pylint: enable=line-too-long - def __init__(self, lr=0.01, epsilon=1e-8, decay=0., **kwargs): super(Adagrad, self).__init__(**kwargs) - self.lr = K.variable(lr, name='lr') + with K.name_scope(self.__class__.__name__): + self.lr = K.variable(lr, name='lr') + self.decay = K.variable(decay, name='decay') + self.iterations = K.variable(0, dtype='int64', name='iterations') self.epsilon = epsilon - self.decay = K.variable(decay, name='decay') self.initial_decay = decay - self.iterations = K.variable(0., name='iterations') - def get_updates(self, params, constraints, loss): + def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators - self.updates = [] + self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr *= (1. / (1. + self.decay * self.iterations)) - self.updates.append(K.update_add(self.iterations, 1)) + lr *= (1. / (1. + self.decay * K.cast(self.iterations, + K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): new_a = a + K.square(g) # update accumulator self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) - # apply constraints - if p in constraints: - c = constraints[p] - new_p = c(new_p) + + # Apply constraints. + if getattr(p, 'constraint', None) is not None: + new_p = p.constraint(new_p) + self.updates.append(K.update(p, new_p)) return self.updates @@ -341,7 +335,6 @@ class Adagrad(Optimizer): class Adadelta(Optimizer): - # pylint: disable=line-too-long """Adadelta optimizer. It is recommended to leave the parameters of this optimizer @@ -359,29 +352,28 @@ class Adadelta(Optimizer): method](http://arxiv.org/abs/1212.5701) """ - # pylint: enable=line-too-long - def __init__(self, lr=1.0, rho=0.95, epsilon=1e-8, decay=0., **kwargs): super(Adadelta, self).__init__(**kwargs) - self.lr = K.variable(lr, name='lr') + with K.name_scope(self.__class__.__name__): + self.lr = K.variable(lr, name='lr') + self.decay = K.variable(decay, name='decay') + self.iterations = K.variable(0, dtype='int64', name='iterations') self.rho = rho self.epsilon = epsilon - self.decay = K.variable(decay, name='decay') self.initial_decay = decay - self.iterations = K.variable(0., name='iterations') - def get_updates(self, params, constraints, loss): + def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] delta_accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators - self.updates = [] + self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr *= (1. / (1. + self.decay * self.iterations)) - self.updates.append(K.update_add(self.iterations, 1)) + lr *= (1. / (1. + self.decay * K.cast(self.iterations, + K.dtype(self.decay)))) for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator @@ -390,12 +382,12 @@ class Adadelta(Optimizer): # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) - new_p = p - lr * update - # apply constraints - if p in constraints: - c = constraints[p] - new_p = c(new_p) + + # Apply constraints. + if getattr(p, 'constraint', None) is not None: + new_p = p.constraint(new_p) + self.updates.append(K.update(p, new_p)) # update delta_accumulator @@ -415,7 +407,6 @@ class Adadelta(Optimizer): class Adam(Optimizer): - # pylint: disable=line-too-long """Adam optimizer. Default parameters follow those provided in the original paper. @@ -432,8 +423,6 @@ class Adam(Optimizer): Optimization](http://arxiv.org/abs/1412.6980v8) """ - # pylint: enable=line-too-long - def __init__(self, lr=0.001, beta_1=0.9, @@ -442,29 +431,30 @@ class Adam(Optimizer): decay=0., **kwargs): super(Adam, self).__init__(**kwargs) - self.iterations = K.variable(0, name='iterations') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') + with K.name_scope(self.__class__.__name__): + self.iterations = K.variable(0, dtype='int64', name='iterations') + self.lr = K.variable(lr, name='lr') + self.beta_1 = K.variable(beta_1, name='beta_1') + self.beta_2 = K.variable(beta_2, name='beta_2') + self.decay = K.variable(decay, name='decay') self.epsilon = epsilon - self.decay = K.variable(decay, name='decay') self.initial_decay = decay - def get_updates(self, params, constraints, loss): + def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr *= (1. / (1. + self.decay * self.iterations)) + lr *= (1. / (1. + self.decay * K.cast(self.iterations, + K.dtype(self.decay)))) - t = self.iterations + 1 + t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) - shapes = [K.int_shape(p) for p in params] - ms = [K.zeros(shape) for shape in shapes] - vs = [K.zeros(shape) for shape in shapes] + ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] + vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): @@ -474,12 +464,12 @@ class Adam(Optimizer): self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) - new_p = p_t - # apply constraints - if p in constraints: - c = constraints[p] - new_p = c(new_p) + + # Apply constraints. + if getattr(p, 'constraint', None) is not None: + new_p = p.constraint(new_p) + self.updates.append(K.update(p, new_p)) return self.updates @@ -496,7 +486,6 @@ class Adam(Optimizer): class Adamax(Optimizer): - # pylint: disable=line-too-long """Adamax optimizer from Adam paper's Section 7. It is a variant of Adam based on the infinity norm. @@ -513,8 +502,6 @@ class Adamax(Optimizer): Optimization](http://arxiv.org/abs/1412.6980v8) """ - # pylint: enable=line-too-long - def __init__(self, lr=0.002, beta_1=0.9, @@ -523,23 +510,25 @@ class Adamax(Optimizer): decay=0., **kwargs): super(Adamax, self).__init__(**kwargs) - self.iterations = K.variable(0., name='iterations') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') + with K.name_scope(self.__class__.__name__): + self.iterations = K.variable(0, dtype='int64', name='iterations') + self.lr = K.variable(lr, name='lr') + self.beta_1 = K.variable(beta_1, name='beta_1') + self.beta_2 = K.variable(beta_2, name='beta_2') + self.decay = K.variable(decay, name='decay') self.epsilon = epsilon - self.decay = K.variable(decay, name='decay') self.initial_decay = decay - def get_updates(self, params, constraints, loss): + def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: - lr *= (1. / (1. + self.decay * self.iterations)) + lr *= (1. / (1. + self.decay * K.cast(self.iterations, + K.dtype(self.decay)))) - t = self.iterations + 1 + t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr / (1. - K.pow(self.beta_1, t)) shapes = [K.int_shape(p) for p in params] @@ -557,12 +546,12 @@ class Adamax(Optimizer): self.updates.append(K.update(m, m_t)) self.updates.append(K.update(u, u_t)) - new_p = p_t - # apply constraints - if p in constraints: - c = constraints[p] - new_p = c(new_p) + + # Apply constraints. + if getattr(p, 'constraint', None) is not None: + new_p = p.constraint(new_p) + self.updates.append(K.update(p, new_p)) return self.updates @@ -579,7 +568,6 @@ class Adamax(Optimizer): class Nadam(Optimizer): - # pylint: disable=line-too-long """Nesterov Adam optimizer. Much like Adam is essentially RMSprop with momentum, @@ -600,8 +588,6 @@ class Nadam(Optimizer): learning](http://www.cs.toronto.edu/~fritz/absps/momentum.pdf) """ - # pylint: enable=line-too-long - def __init__(self, lr=0.002, beta_1=0.9, @@ -610,26 +596,26 @@ class Nadam(Optimizer): schedule_decay=0.004, **kwargs): super(Nadam, self).__init__(**kwargs) - self.iterations = K.variable(0., name='iterations') - self.m_schedule = K.variable(1., name='m_schedule') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') + with K.name_scope(self.__class__.__name__): + self.iterations = K.variable(0, dtype='int64', name='iterations') + self.m_schedule = K.variable(1., name='m_schedule') + self.lr = K.variable(lr, name='lr') + self.beta_1 = K.variable(beta_1, name='beta_1') + self.beta_2 = K.variable(beta_2, name='beta_2') self.epsilon = epsilon self.schedule_decay = schedule_decay - def get_updates(self, params, constraints, loss): + def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] - - t = self.iterations + 1 + t = K.cast(self.iterations, K.floatx()) + 1 # Due to the recommendations in [2], i.e. warming momentum schedule - momentum_cache_t = self.beta_1 * (1. - 0.5 * - (K.pow(0.96, t * self.schedule_decay))) - momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * - (K.pow(0.96, - (t + 1) * self.schedule_decay))) + momentum_cache_t = self.beta_1 * ( + 1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) + momentum_cache_t_1 = self.beta_1 * ( + 1. - 0.5 * + (K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) @@ -656,10 +642,10 @@ class Nadam(Optimizer): p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) new_p = p_t - # apply constraints - if p in constraints: - c = constraints[p] - new_p = c(new_p) + # Apply constraints. + if getattr(p, 'constraint', None) is not None: + new_p = p.constraint(new_p) + self.updates.append(K.update(p, new_p)) return self.updates @@ -681,16 +667,12 @@ class TFOptimizer(Optimizer): def __init__(self, optimizer): # pylint: disable=super-init-not-called self.optimizer = optimizer - self.iterations = K.variable(0., name='iterations') - self.updates = [] + with K.name_scope(self.__class__.__name__): + self.iterations = K.variable(0, dtype='int64', name='iterations') - def get_updates(self, params, constraints, loss): - if constraints: - raise ValueError('TF optimizers do not support ' - 'weights constraints. Either remove ' - 'all weights constraints in your model, ' - 'or use a Keras optimizer.') + def get_updates(self, loss, params): grads = self.optimizer.compute_gradients(loss, params) + self.updates = [K.update_add(self.iterations, 1)] opt_update = self.optimizer.apply_gradients( grads, global_step=self.iterations) self.updates.append(opt_update) diff --git a/tensorflow/contrib/keras/python/keras/optimizers_test.py b/tensorflow/contrib/keras/python/keras/optimizers_test.py index bb598f3037..a105d13cf9 100644 --- a/tensorflow/contrib/keras/python/keras/optimizers_test.py +++ b/tensorflow/contrib/keras/python/keras/optimizers_test.py @@ -54,6 +54,23 @@ def _test_optimizer(optimizer, target=0.75): new_config['class_name'] = new_config['class_name'].lower() assert config == new_config + # Test constraints. + model = keras.models.Sequential() + dense = keras.layers.Dense(10, + input_shape=(x_train.shape[1],), + kernel_constraint=lambda x: 0. * x + 1., + bias_constraint=lambda x: 0. * x + 2., + activation='relu') + model.add(dense) + model.add(keras.layers.Dense(y_train.shape[1], activation='softmax')) + model.compile(loss='categorical_crossentropy', + optimizer=optimizer, + metrics=['accuracy']) + model.train_on_batch(x_train[:10], y_train[:10]) + kernel, bias = dense.get_weights() + np.testing.assert_allclose(kernel, 1., atol=1e-3) + np.testing.assert_allclose(bias, 2., atol=1e-3) + class KerasOptimizersTest(test.TestCase): @@ -105,19 +122,17 @@ class KerasOptimizersTest(test.TestCase): clipvalue=0.5)) def test_tfoptimizer(self): - optimizer = keras.optimizers.TFOptimizer(AdamOptimizer) + optimizer = keras.optimizers.TFOptimizer(AdamOptimizer(0.01)) model = keras.models.Sequential() model.add(keras.layers.Dense( 2, input_shape=(3,), kernel_constraint=keras.constraints.MaxNorm(1))) # This is possible model.compile(loss='mean_squared_error', optimizer=optimizer) - # TF optimizers do not support weights constraints - with self.assertRaises(ValueError): - model.fit(np.random.random((5, 3)), - np.random.random((5, 2)), - epochs=1, - batch_size=5, - verbose=0) + model.fit(np.random.random((5, 3)), + np.random.random((5, 2)), + epochs=1, + batch_size=5, + verbose=0) # not supported with self.assertRaises(NotImplementedError): _ = optimizer.weights diff --git a/tensorflow/contrib/keras/python/keras/preprocessing/image.py b/tensorflow/contrib/keras/python/keras/preprocessing/image.py index 4f2cff804e..4d6e0e0fcb 100644 --- a/tensorflow/contrib/keras/python/keras/preprocessing/image.py +++ b/tensorflow/contrib/keras/python/keras/preprocessing/image.py @@ -583,9 +583,9 @@ class ImageDataGenerator(object): 'first by calling `.fit(numpy_data)`.') if self.zca_whitening: if self.principal_components is not None: - flatx = np.reshape(x, (x.size)) + flatx = np.reshape(x, (-1, np.prod(x.shape[-3:]))) whitex = np.dot(flatx, self.principal_components) - x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2])) + x = np.reshape(whitex, x.shape) else: logging.warning('This ImageDataGenerator specifies ' '`zca_whitening`, but it hasn\'t' @@ -864,7 +864,7 @@ class NumpyArrayIterator(Iterator): 'with shape', self.x.shape) channels_axis = 3 if data_format == 'channels_last' else 1 if self.x.shape[channels_axis] not in {1, 3, 4}: - raise ValueError( + logging.warning( 'NumpyArrayIterator is set to use the ' 'data format convention "' + data_format + '" ' '(channels on axis ' + str(channels_axis) + '), i.e. expected ' @@ -1076,7 +1076,7 @@ class DirectoryIterator(Iterator): self.save_prefix = save_prefix self.save_format = save_format - white_list_formats = {'png', 'jpg', 'jpeg', 'bmp'} + white_list_formats = {'png', 'jpg', 'jpeg', 'bmp', 'ppm'} # first, count the number of samples and classes self.samples = 0 diff --git a/tensorflow/contrib/keras/python/keras/preprocessing/image_test.py b/tensorflow/contrib/keras/python/keras/preprocessing/image_test.py index bb09ed1ae8..d9ecb19003 100644 --- a/tensorflow/contrib/keras/python/keras/preprocessing/image_test.py +++ b/tensorflow/contrib/keras/python/keras/preprocessing/image_test.py @@ -78,6 +78,11 @@ class TestImage(test.TestCase): cval=0.5, horizontal_flip=True, vertical_flip=True) + # Basic test before fit + x = np.random.random((32, 10, 10, 3)) + generator.flow(x) + + # Fit generator.fit(images, augment=True) for x, _ in generator.flow( @@ -95,14 +100,17 @@ class TestImage(test.TestCase): samplewise_std_normalization=True, zca_whitening=True, data_format='channels_last') + # Test fit with invalid data with self.assertRaises(ValueError): x = np.random.random((3, 10, 10)) generator.fit(x) # Test flow with invalid data with self.assertRaises(ValueError): - x = np.random.random((32, 10, 10)) - generator.flow(np.arange(x.shape[0])) + generator.flow(np.arange(5)) + # Invalid number of channels: will work but raise a warning + x = np.random.random((32, 10, 10, 5)) + generator.flow(x) with self.assertRaises(ValueError): generator = keras.preprocessing.image.ImageDataGenerator( diff --git a/tensorflow/contrib/keras/python/keras/preprocessing/sequence.py b/tensorflow/contrib/keras/python/keras/preprocessing/sequence.py index 382aa386d4..a5deec87af 100644 --- a/tensorflow/contrib/keras/python/keras/preprocessing/sequence.py +++ b/tensorflow/contrib/keras/python/keras/preprocessing/sequence.py @@ -143,7 +143,8 @@ def skipgrams(sequence, negative_samples=1., shuffle=True, categorical=False, - sampling_table=None): + sampling_table=None, + seed=None): """Generates skipgram word pairs. Takes a sequence (list of indexes of words), @@ -169,6 +170,7 @@ def skipgrams(sequence, if True labels will be categorical eg. [[1,0],[0,1],[0,1] .. ] sampling_table: 1D array of size `vocabulary_size` where the entry i encodes the probabibily to sample a word of rank i. + seed: Random seed. Returns: couples, labels: where `couples` are int pairs and @@ -214,7 +216,8 @@ def skipgrams(sequence, labels += [0] * num_negative_samples if shuffle: - seed = random.randint(0, 10e6) + if seed is None: + seed = random.randint(0, 10e6) random.seed(seed) random.shuffle(couples) random.seed(seed) diff --git a/tensorflow/contrib/keras/python/keras/preprocessing/text.py b/tensorflow/contrib/keras/python/keras/preprocessing/text.py index ed00eef6ad..47e5aa064f 100644 --- a/tensorflow/contrib/keras/python/keras/preprocessing/text.py +++ b/tensorflow/contrib/keras/python/keras/preprocessing/text.py @@ -52,7 +52,13 @@ def text_to_word_sequence(text, """ if lower: text = text.lower() - text = text.translate(maketrans(filters, split * len(filters))) + + if sys.version_info < (3,) and isinstance(text, unicode): + translate_map = dict((ord(c), unicode(split)) for c in filters) + else: + translate_map = maketrans(filters, split * len(filters)) + + text = text.translate(translate_map) seq = text.split(split) return [i for i in seq if i] diff --git a/tensorflow/contrib/keras/python/keras/preprocessing/text_test.py b/tensorflow/contrib/keras/python/keras/preprocessing/text_test.py index 7b26219e61..7deeff0873 100644 --- a/tensorflow/contrib/keras/python/keras/preprocessing/text_test.py +++ b/tensorflow/contrib/keras/python/keras/preprocessing/text_test.py @@ -33,6 +33,13 @@ class TestText(test.TestCase): self.assertLessEqual(np.max(encoded), 4) self.assertGreaterEqual(np.min(encoded), 0) + # Test on unicode. + text = u'The cat sat on the mat.' + encoded = keras.preprocessing.text.one_hot(text, 5) + self.assertEqual(len(encoded), 6) + self.assertLessEqual(np.max(encoded), 4) + self.assertGreaterEqual(np.min(encoded), 0) + def test_tokenizer(self): texts = [ 'The cat sat on the mat.', diff --git a/tensorflow/contrib/keras/python/keras/utils/data_utils.py b/tensorflow/contrib/keras/python/keras/utils/data_utils.py index 853625e7c4..08ab8d7204 100644 --- a/tensorflow/contrib/keras/python/keras/utils/data_utils.py +++ b/tensorflow/contrib/keras/python/keras/utils/data_utils.py @@ -368,6 +368,12 @@ class Sequence(object): """ raise NotImplementedError + @abstractmethod + def on_epoch_end(self): + """Method called at the end of every epoch. + """ + raise NotImplementedError + def get_index(ds, i): """Quick fix for Python2, otherwise, it cannot be pickled. @@ -453,15 +459,16 @@ class OrderedEnqueuer(SequenceEnqueuer): use_multiprocessing: use multiprocessing if True, otherwise threading scheduling: Sequential querying of datas if 'sequential', random otherwise. + shuffle: Whether to shuffle the data at the beginning of each epoch. """ def __init__(self, sequence, use_multiprocessing=False, - scheduling='sequential'): + shuffle=False): self.sequence = sequence self.use_multiprocessing = use_multiprocessing - self.scheduling = scheduling + self.shuffle = shuffle self.workers = 0 self.executor = None self.queue = None @@ -493,7 +500,7 @@ class OrderedEnqueuer(SequenceEnqueuer): """Submits requests to the executor and queues the `Future` objects.""" sequence = list(range(len(self.sequence))) while True: - if self.scheduling is not 'sequential': + if self.shuffle: random.shuffle(sequence) for i in sequence: if self.stop_signal.is_set(): @@ -501,6 +508,7 @@ class OrderedEnqueuer(SequenceEnqueuer): self.queue.put( self.executor.apply_async(get_index, (self.sequence, i)), block=True) + self.sequence.on_epoch_end() def get(self): """Creates a generator to extract data from the queue. diff --git a/tensorflow/contrib/keras/python/keras/utils/generic_utils.py b/tensorflow/contrib/keras/python/keras/utils/generic_utils.py index 3428476b17..39a10c8650 100644 --- a/tensorflow/contrib/keras/python/keras/utils/generic_utils.py +++ b/tensorflow/contrib/keras/python/keras/utils/generic_utils.py @@ -197,7 +197,8 @@ def func_dump(func): A tuple `(code, defaults, closure)`. """ if os.name == 'nt': - code = marshal.dumps(func.__code__).replace(b'\\',b'/').decode('raw_unicode_escape') + code = marshal.dumps( + func.__code__).replace(b'\\', b'/').decode('raw_unicode_escape') else: code = marshal.dumps(func.__code__).decode('raw_unicode_escape') defaults = func.__defaults__ @@ -331,7 +332,7 @@ class Progbar(object): for k in self.unique_values: info += ' - %s:' % k if isinstance(self.sum_values[k], list): - avg = self.sum_values[k][0] / max(1, self.sum_values[k][1]) + avg = np.mean(self.sum_values[k][0] / max(1, self.sum_values[k][1])) if abs(avg) > 1e-3: info += ' %.4f' % avg else: @@ -354,7 +355,7 @@ class Progbar(object): info = '%ds' % (now - self.start) for k in self.unique_values: info += ' - %s:' % k - avg = self.sum_values[k][0] / max(1, self.sum_values[k][1]) + avg = np.mean(self.sum_values[k][0] / max(1, self.sum_values[k][1])) if avg > 1e-3: info += ' %.4f' % avg else: diff --git a/tensorflow/contrib/keras/python/keras/utils/io_utils.py b/tensorflow/contrib/keras/python/keras/utils/io_utils.py index 70b2d96907..5f2ba99be7 100644 --- a/tensorflow/contrib/keras/python/keras/utils/io_utils.py +++ b/tensorflow/contrib/keras/python/keras/utils/io_utils.py @@ -89,7 +89,7 @@ class HDF5Matrix(object): idx = slice(start + self.start, stop + self.start) else: raise IndexError - elif isinstance(key, int): + elif isinstance(key, (int, np.integer)): if key + self.start < self.end: idx = key + self.start else: diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index 68e826ccd5..b223d7d887 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -95,7 +95,8 @@ do_pylint() { "^tensorflow/python/platform/default/_googletest\.py.*\[E0102.*function\salready\sdefined "\ "^tensorflow/python/feature_column/feature_column_test\.py.*\[E0110.*abstract-class-instantiated "\ "^tensorflow/contrib/layers/python/layers/feature_column\.py.*\[E0110.*abstract-class-instantiated "\ -"^tensorflow/python/platform/gfile\.py.*\[E0301.*non-iterator" +"^tensorflow/python/platform/gfile\.py.*\[E0301.*non-iterator "\ +"^tensorflow/contrib/keras/python/keras/callbacks\.py.*\[E1133.*not-an-iterable" echo "ERROR_WHITELIST=\"${ERROR_WHITELIST}\"" -- GitLab From ec7fe797ab827bbf0a57d74154b43d722b64edcf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 Aug 2017 17:14:33 -0700 Subject: [PATCH 008/294] Verify that TPU estimator node has a crossReplicaSum used during the training phase PiperOrigin-RevId: 166545767 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 2cbb118ed6..7c883ec926 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -55,6 +55,7 @@ from tensorflow.python.training import training _INITIAL_LOSS = 1e7 _ZERO_LOSS = 0. _BATCH_SIZE_KEY = 'batch_size' +_CROSS_REPLICA_SUM_OP = 'CrossReplicaSum' _RESERVED_PARAMS_KEYS = [_BATCH_SIZE_KEY] @@ -1284,6 +1285,10 @@ def _augment_model_fn(model_fn, train_batch_size, eval_batch_size, use_tpu, summary.scalar(model_fn_lib.LOSS_METRIC_KEY, loss) with ops.control_dependencies([loss]): update_ops = _sync_variables_ops() + + # Validate the TPU training graph to catch basic errors + _validate_tpu_training_graph() + return model_fn_lib.EstimatorSpec( mode, loss=loss, @@ -1353,10 +1358,28 @@ def _train_on_tpu_system(model_fn_wrapper, dequeue_fn): dequeue_fn) multi_tpu_train_steps_on_single_shard = (lambda: training_loop.repeat( # pylint: disable=g-long-lambda - iterations_per_loop, single_tpu_train_step, [_INITIAL_LOSS], name='loop')) + iterations_per_loop, single_tpu_train_step, [_INITIAL_LOSS], + name=b'loop')) (loss,) = tpu.shard(multi_tpu_train_steps_on_single_shard, inputs=[], num_shards=num_shards, outputs_from_all_shards=False) return loss + + +def _validate_tpu_training_graph(): + """Validate graph before running distributed training. + + Raises: + ValueError: If the graph seems invalid for running on device + """ + operations = ops.get_default_graph().get_operations() + + # Check if there is atleast one CrossReplicaSum operation in the graph + # This should be introduced by using the CrossShardOptimizer wrapper + cross_replica_sum_ops = [o for o in operations + if o.type == _CROSS_REPLICA_SUM_OP] + if not cross_replica_sum_ops: + raise ValueError( + 'CrossShardOptimizer must be used for model training on TPUs.') -- GitLab From 4359ad606c613e6491eb9e61fc32b5a24dba4b5e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 Aug 2017 17:15:37 -0700 Subject: [PATCH 009/294] Add support for eager mode to saver. PiperOrigin-RevId: 166545870 --- tensorflow/python/framework/test_util.py | 4 + .../resource_variable_ops_test.py | 2 + .../python/kernel_tests/variables_test.py | 2 + tensorflow/python/ops/variables.py | 12 + tensorflow/python/training/saver.py | 212 +++++++++++++----- tensorflow/python/training/saver_test.py | 170 ++++++++++---- .../python/training/saver_test_utils.py | 13 +- tensorflow/python/training/training_util.py | 3 + 8 files changed, 304 insertions(+), 114 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 8a24d10c59..b494b988b3 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -53,6 +53,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import versions from tensorflow.python.ops import array_ops +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import googletest from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib @@ -490,6 +491,9 @@ class TensorFlowTestCase(googletest.TestCase): def _eval_helper(self, tensors): if isinstance(tensors, ops.EagerTensor): return tensors.numpy() + if isinstance(tensors, resource_variable_ops.ResourceVariable): + return tensors.read_value().numpy() + if isinstance(tensors, tuple): return tuple([self._eval_helper(t) for t in tensors]) elif isinstance(tensors, list): diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index e08130c103..cfb47c9bf3 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -315,6 +315,8 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): def testSharedNameWithNamescope(self): with ops.name_scope("foo"): v = resource_variable_ops.ResourceVariable(300.0, name="var6") + self.assertEqual("foo/var6", v._shared_name) # pylint: disable=protected-access + self.assertEqual("foo/var6:0", v.name) self.evaluate(variables.global_variables_initializer()) w = resource_variable_ops.var_handle_op( diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py index 414fb0c699..7718710c69 100644 --- a/tensorflow/python/kernel_tests/variables_test.py +++ b/tensorflow/python/kernel_tests/variables_test.py @@ -44,12 +44,14 @@ class VariablesTestCase(test.TestCase): with self.test_session(): var0 = variables.Variable(0.0) self.assertEqual("Variable:0", var0.name) + self.assertEqual("Variable", var0._shared_name) self.assertEqual([], var0.get_shape()) self.assertEqual([], var0.get_shape()) self.assertEqual([], var0.shape) var1 = variables.Variable(1.1) self.assertEqual("Variable_1:0", var1.name) + self.assertEqual("Variable_1", var1._shared_name) self.assertEqual([], var1.get_shape()) self.assertEqual([], var1.get_shape()) self.assertEqual([], var1.shape) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 33b9907dc6..2e49e452d0 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -857,6 +857,18 @@ class Variable(object): """The name of this variable.""" return self._variable.name + @property + def _shared_name(self): + """The shared name of the variable. + + Unlike name(), shared_name doesn't have ":0" suffix. It is user-specified + name with name scope prefix. + + Returns: + variable name. + """ + return self.name[:-2] + @property def initializer(self): """The initializer operation for this variable.""" diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index fb1ac0029a..dc9bd3a8c2 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -33,6 +33,7 @@ from google.protobuf import text_format from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.core.protobuf import saver_pb2 from tensorflow.python.client import session +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import device as pydev from tensorflow.python.framework import errors @@ -92,14 +93,18 @@ class BaseSaverBuilder(object): """Creates a `SaveSpec` object. Args: - tensor: the tensor to save. + tensor: the tensor to save or callable that produces a tensor to save. slice_spec: the slice to be saved. See `Variable.SaveSliceInfo`. name: the name to save the tensor under. """ - self.tensor = tensor + self._tensor = tensor self.slice_spec = slice_spec self.name = name + @property + def tensor(self): + return self._tensor() if callable(self._tensor) else self._tensor + class SaveableObject(object): """Base class for saving and restoring saveable objects.""" @@ -160,13 +165,15 @@ class BaseSaverBuilder(object): def __init__(self, var, slice_spec, name): if isinstance(var, ops.Tensor): self.handle_op = var.op.inputs[0] + tensor = var elif isinstance(var, resource_variable_ops.ResourceVariable): self.handle_op = var.handle + tensor = var.read_value else: raise ValueError( "Saveable is neither a resource variable nor a read operation." " Got: %s" % repr(var)) - spec = BaseSaverBuilder.SaveSpec(var, slice_spec, name) + spec = BaseSaverBuilder.SaveSpec(tensor, slice_spec, name) super(BaseSaverBuilder.ResourceVariableSaveable, self).__init__( var, [spec], name) @@ -521,17 +528,24 @@ class BaseSaverBuilder(object): else: names_to_saveables[name] = [var] else: - var = ops.internal_convert_to_tensor(var, as_ref=True) - if not BaseSaverBuilder._IsVariable(var): - raise TypeError("Variable to save is not a Variable: %s" % var) - if var.op.type == "ReadVariableOp": - name = var.op.inputs[0].op.name + if context.in_graph_mode(): + var = ops.internal_convert_to_tensor(var, as_ref=True) + if not BaseSaverBuilder._IsVariable(var): + raise TypeError("Variable to save is not a Variable: %s" % var) + if var.op.type == "ReadVariableOp": + name = var.op.inputs[0].op.name + else: + name = var.op.name + if name in names_to_saveables: + raise ValueError("At least two variables have the same name: %s" % + name) + names_to_saveables[name] = var else: - name = var.op.name - if name in names_to_saveables: - raise ValueError("At least two variables have the same name: %s" % - name) - names_to_saveables[name] = var + if not isinstance(var, resource_variable_ops.ResourceVariable): + raise ValueError("Can only save/restore ResourceVariable eager " + "mode is enabled, type: %s." % type(var)) + names_to_saveables[var._shared_name] = var + # pylint: enable=protected-access return names_to_saveables @@ -592,16 +606,23 @@ class BaseSaverBuilder(object): # pylint: enable=protected-access else: # A variable or tensor. - variable = ops.internal_convert_to_tensor(op, as_ref=True) - if not BaseSaverBuilder._IsVariable(variable): - raise TypeError("names_to_saveables must be a dict mapping string " - "names to Tensors/Variables. Not a variable: %s" % - variable) - if variable.op.type in ["Variable", "VariableV2", "AutoReloadVariable"]: - saveable = BaseSaverBuilder.VariableSaveable(variable, "", name) + if context.in_eager_mode(): + if not isinstance(op, resource_variable_ops.ResourceVariable): + raise ValueError("Can only save/restore ResourceVariable eager " + "mode is enabled, type: %s." % type(op)) + saveable = BaseSaverBuilder.ResourceVariableSaveable(op, "", name) else: - saveable = BaseSaverBuilder.ResourceVariableSaveable( - variable, "", name) + variable = ops.internal_convert_to_tensor(op, as_ref=True) + if not BaseSaverBuilder._IsVariable(variable): + raise TypeError("names_to_saveables must be a dict mapping string " + "names to Tensors/Variables. Not a variable: %s" % + variable) + if variable.op.type in ["Variable", "VariableV2", + "AutoReloadVariable"]: + saveable = BaseSaverBuilder.VariableSaveable(variable, "", name) + else: + saveable = BaseSaverBuilder.ResourceVariableSaveable( + variable, "", name) self._AddSaveable(saveables, seen_ops, saveable) return saveables @@ -632,7 +653,7 @@ class BaseSaverBuilder(object): name=None, restore_sequentially=False, filename="model"): - """Adds save/restore nodes to the graph and creates a SaverDef proto. + """Builds save/restore graph nodes or runs save/restore in eager mode. Args: names_to_saveables: A dictionary mapping name to a Variable or @@ -667,6 +688,31 @@ class BaseSaverBuilder(object): ValueError: If any of the keys or values in 'names_to_saveables' is not unique. """ + return self._build_internal( + names_to_saveables=names_to_saveables, + reshape=reshape, + sharded=sharded, + max_to_keep=max_to_keep, + keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, + name=name, + restore_sequentially=restore_sequentially, + filename=filename) + + def _build_internal(self, + names_to_saveables, + reshape=False, + sharded=False, + max_to_keep=5, + keep_checkpoint_every_n_hours=10000.0, + name=None, + restore_sequentially=False, + filename="model", + build_save=True, + build_restore=True): + """build() with option to only perform save and restore.""" + if context.in_graph_mode() and (not build_save or not build_restore): + raise ValueError("Graph mode needs to build save and restore together.") + saveables = self._ValidateAndSliceInputs(names_to_saveables) if max_to_keep is None: max_to_keep = 0 @@ -679,13 +725,17 @@ class BaseSaverBuilder(object): # Add the save ops. if sharded: per_device = self._GroupByDevices(saveables) - save_tensor = self._AddShardedSaveOps(filename_tensor, per_device) - restore_op = self._AddShardedRestoreOps(filename_tensor, per_device, - restore_sequentially, reshape) + if build_save: + save_tensor = self._AddShardedSaveOps(filename_tensor, per_device) + if build_restore: + restore_op = self._AddShardedRestoreOps(filename_tensor, per_device, + restore_sequentially, reshape) else: - save_tensor = self._AddSaveOps(filename_tensor, saveables) - restore_op = self._AddRestoreOps(filename_tensor, saveables, - restore_sequentially, reshape) + if build_save: + save_tensor = self._AddSaveOps(filename_tensor, saveables) + if build_restore: + restore_op = self._AddRestoreOps(filename_tensor, saveables, + restore_sequentially, reshape) # In the following use case, it's possible to have restore_ops be called # something else: @@ -698,15 +748,26 @@ class BaseSaverBuilder(object): # such usage model makes sense. # # assert restore_op.name.endswith("restore_all"), restore_op.name - - return saver_pb2.SaverDef( - filename_tensor_name=filename_tensor.name, - save_tensor_name=save_tensor.name, - restore_op_name=restore_op.name, - max_to_keep=max_to_keep, - sharded=sharded, - keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, - version=self._write_version) + if context.in_graph_mode(): + return saver_pb2.SaverDef( + filename_tensor_name=filename_tensor.name, + save_tensor_name=save_tensor.name, + restore_op_name=restore_op.name, + max_to_keep=max_to_keep, + sharded=sharded, + keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, + version=self._write_version) + else: + # Store the tensor values to the tensor_names. + save_tensor_name = save_tensor.numpy() if build_save else "" + return saver_pb2.SaverDef( + filename_tensor_name=filename_tensor.numpy(), + save_tensor_name=save_tensor_name, + restore_op_name="", + max_to_keep=max_to_keep, + sharded=sharded, + keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, + version=self._write_version) def _get_saver_or_default(): @@ -1136,7 +1197,7 @@ class Saver(object): self._write_version = write_version self._pad_step_number = pad_step_number self._filename = filename - if not defer_build: + if not defer_build and context.in_graph_mode(): self.build() if self.saver_def: self._check_saver_def() @@ -1144,11 +1205,22 @@ class Saver(object): self._save_relative_paths = save_relative_paths def build(self): + if context.in_eager_mode(): + raise ValueError("Use save/restore instead of build in eager mode.") + self._build(self._filename, build_save=True, build_restore=True) + + def _build_eager(self, checkpoint_path, build_save, build_restore): + self._build( + checkpoint_path, build_save=build_save, build_restore=build_restore) + + def _build(self, checkpoint_path, build_save, build_restore): """Builds saver_def.""" - if self._is_built: - return - self._is_built = True - if not self.saver_def: + if context.in_graph_mode(): + if self._is_built: + return + self._is_built = True + + if not self.saver_def or context.in_eager_mode(): if self._builder is None: self._builder = BaseSaverBuilder(self._write_version) if self._var_list is None: @@ -1161,7 +1233,8 @@ class Saver(object): else: raise ValueError("No variables to save") self._is_empty = False - self.saver_def = self._builder.build( + + self.saver_def = self._builder._build_internal( # pylint: disable=protected-access self._var_list, reshape=self._reshape, sharded=self._sharded, @@ -1169,7 +1242,8 @@ class Saver(object): keep_checkpoint_every_n_hours=self._keep_checkpoint_every_n_hours, name=self._name, restore_sequentially=self._restore_sequentially, - filename=self._filename) + filename=checkpoint_path, + build_save=build_save, build_restore=build_restore) elif self.saver_def and self._name: # Since self._name is used as a name_scope by builder(), we are # overloading the use of this field to represent the "import_scope" as @@ -1191,12 +1265,13 @@ class Saver(object): if not isinstance(self.saver_def, saver_pb2.SaverDef): raise ValueError("saver_def must be a saver_pb2.SaverDef: %s" % self.saver_def) - if not self.saver_def.save_tensor_name: - raise ValueError("saver_def must specify the save_tensor_name: %s" % - str(self.saver_def)) - if not self.saver_def.restore_op_name: - raise ValueError("saver_def must specify the restore_op_name: %s" % - str(self.saver_def)) + if context.in_graph_mode(): + if not self.saver_def.save_tensor_name: + raise ValueError("saver_def must specify the save_tensor_name: %s" % + str(self.saver_def)) + if not self.saver_def.restore_op_name: + raise ValueError("saver_def must specify the restore_op_name: %s" % + str(self.saver_def)) def _CheckpointFilename(self, p): """Returns the checkpoint filename given a `(filename, time)` pair. @@ -1402,7 +1477,7 @@ class Saver(object): path can be passed directly to a call to `restore()`. Args: - sess: A Session to use to save the variables. + sess: A Session to use to save the variables. None in eager mode. save_path: String. Path to the checkpoint filename. If the saver is `sharded`, this is the prefix of the sharded checkpoint filename. global_step: If provided the global step number is appended to @@ -1431,7 +1506,7 @@ class Saver(object): collides with `save_path`. RuntimeError: If save and restore ops weren't built. """ - if not self._is_built: + if not self._is_built and context.in_graph_mode(): raise RuntimeError( "`build()` should be called before save if defer_build==True") if latest_filename is None: @@ -1457,21 +1532,28 @@ class Saver(object): else: checkpoint_file = save_path if os.path.basename( - save_path) == latest_filename and not self.saver_def.sharded: + save_path) == latest_filename and not self._sharded: # Guard against collision between data file and checkpoint state file. raise ValueError( "'latest_filename' collides with 'save_path': '%s' and '%s'" % (latest_filename, save_path)) - if not isinstance(sess, session.SessionInterface): + if (context.in_graph_mode() and + not isinstance(sess, session.SessionInterface)): raise TypeError("'sess' must be a Session; %s" % sess) save_path_parent = os.path.dirname(save_path) if not self._is_empty: try: - model_checkpoint_path = sess.run( - self.saver_def.save_tensor_name, - {self.saver_def.filename_tensor_name: checkpoint_file}) + if context.in_graph_mode(): + model_checkpoint_path = sess.run( + self.saver_def.save_tensor_name, + {self.saver_def.filename_tensor_name: checkpoint_file}) + else: + self._build_eager( + checkpoint_file, build_save=True, build_restore=False) + model_checkpoint_path = self.saver_def.save_tensor_name + model_checkpoint_path = compat.as_str(model_checkpoint_path) if write_state: self._MaybeDeleteOldCheckpoints( @@ -1492,8 +1574,11 @@ class Saver(object): if write_meta_graph: meta_graph_filename = self._MetaGraphFilename( checkpoint_file, meta_graph_suffix=meta_graph_suffix) - with sess.graph.as_default(): + if context.in_eager_mode(): self.export_meta_graph(meta_graph_filename) + else: + with sess.graph.as_default(): + self.export_meta_graph(meta_graph_filename) if self._is_empty: return None @@ -1545,7 +1630,7 @@ class Saver(object): `save()` call, or a call to `latest_checkpoint()`. Args: - sess: A `Session` to use to restore the parameters. + sess: A `Session` to use to restore the parameters. None in eager mode. save_path: Path where parameters were previously saved. Raises: @@ -1556,8 +1641,11 @@ class Saver(object): if save_path is None: raise ValueError("Can't load save_path when it is None.") logging.info("Restoring parameters from %s", save_path) - sess.run(self.saver_def.restore_op_name, - {self.saver_def.filename_tensor_name: save_path}) + if context.in_graph_mode(): + sess.run(self.saver_def.restore_op_name, + {self.saver_def.filename_tensor_name: save_path}) + else: + self._build_eager(save_path, build_save=False, build_restore=True) @staticmethod def _add_collection_def(meta_graph_def, key, export_scope=None): diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 2d878e3057..35c5980818 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -38,6 +38,7 @@ from tensorflow.core.protobuf import queue_runner_pb2 from tensorflow.core.protobuf import saver_pb2 from tensorflow.python import pywrap_tensorflow from tensorflow.python.client import session +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -77,97 +78,170 @@ class SaverTest(test.TestCase): def basicSaveRestore(self, variable_op): save_path = os.path.join(self.get_temp_dir(), "basic_save_restore") - # Build a graph with 2 parameter nodes, and Save and - # Restore nodes for them. - v0 = variable_op(10.0, name="v0") - v1 = variable_op(20.0, name="v1") - v2 = saver_test_utils.CheckpointedOp(name="v2") - v2_init = v2.insert("k1", 30.0) - save = saver_module.Saver( - { - "v0": v0, - "v1": v1, - "v2": v2.saveable - }, restore_sequentially=True) - init_all_op = [variables.global_variables_initializer(), v2_init] + with self.test_session(graph=ops_lib.Graph()) as sess: + # Build a graph with 2 parameter nodes, and Save and + # Restore nodes for them. + v0 = variable_op(10.0, name="v0") + v1 = variable_op(20.0, name="v1") + v2 = saver_test_utils.CheckpointedOp(name="v2") + v2_init = v2.insert("k1", 30.0) - with self.test_session() as sess: # Initialize all variables - sess.run(init_all_op) + if context.in_graph_mode(): + self.evaluate([variables.global_variables_initializer(), v2_init]) - # Check that the parameter nodes have been initialized. - self.assertEqual(10.0, v0.eval()) - self.assertEqual(20.0, v1.eval()) - self.assertEqual(b"k1", v2.keys().eval()) - self.assertEqual(30.0, v2.values().eval()) + # Check that the parameter nodes have been initialized. + self.assertEqual(10.0, self.evaluate(v0)) + self.assertEqual(20.0, self.evaluate(v1)) + self.assertEqual(b"k1", self.evaluate(v2.keys())) + self.assertEqual(30.0, self.evaluate(v2.values())) # Save the initialized values in the file at "save_path" + save = saver_module.Saver( + { + "v0": v0, + "v1": v1, + "v2": v2.saveable + }, restore_sequentially=True) val = save.save(sess, save_path) self.assertTrue(isinstance(val, six.string_types)) self.assertEqual(save_path, val) # Start a second session. In that session the parameter nodes # have not been initialized either. - with self.test_session() as sess: + with self.test_session(graph=ops_lib.Graph()) as sess: v0 = variable_op(-1.0, name="v0") v1 = variable_op(-1.0, name="v1") v2 = saver_test_utils.CheckpointedOp(name="v2") - save = saver_module.Saver({"v0": v0, "v1": v1, "v2": v2.saveable}) # Assert that the variables are not initialized. - self.assertEqual( - len(variables.report_uninitialized_variables().eval()), 2) - self.assertEqual(0, len(v2.keys().eval())) - self.assertEqual(0, len(v2.values().eval())) - + if context.in_graph_mode(): + self.assertEqual( + len(variables.report_uninitialized_variables().eval()), 2) + self.assertEqual(0, len(v2.keys().eval())) + self.assertEqual(0, len(v2.values().eval())) # Restore the saved values in the parameter nodes. + save = saver_module.Saver({"v0": v0, "v1": v1, "v2": v2.saveable}) save.restore(sess, save_path) # Check that the parameter nodes have been restored. - self.assertEqual(10.0, v0.eval()) - self.assertEqual(20.0, v1.eval()) - self.assertEqual(b"k1", v2.keys().eval()) - self.assertEqual(30.0, v2.values().eval()) + self.assertEqual(10.0, self.evaluate(v0)) + self.assertEqual(20.0, self.evaluate(v1)) + self.assertEqual(b"k1", self.evaluate(v2.keys())) + self.assertEqual(30.0, self.evaluate(v2.values())) # Build another graph with 2 nodes, initialized # differently, and a Restore node for them. - with self.test_session() as sess: + with self.test_session(graph=ops_lib.Graph()) as sess: v0_2 = variable_op(1000.0, name="v0") v1_2 = variable_op(2000.0, name="v1") v2_2 = saver_test_utils.CheckpointedOp(name="v2") - save2 = saver_module.Saver({"v0": v0_2, "v1": v1_2, "v2": v2_2.saveable}) - v2_2.insert("k1000", 3000.0).run() - variables.global_variables_initializer().run() + v2_init = v2_2.insert("k1000", 3000.0) # Check that the parameter nodes have been initialized. - self.assertEqual(1000.0, v0_2.eval()) - self.assertEqual(2000.0, v1_2.eval()) - self.assertEqual(b"k1000", v2_2.keys().eval()) - self.assertEqual(3000.0, v2_2.values().eval()) + if context.in_graph_mode(): + init_all_op = [variables.global_variables_initializer(), v2_init] + self.evaluate(init_all_op) + # TODO(xpan): Why _mutable_hash_table_v2 doesn't create empty + # table as it claims in eager mode? + self.assertEqual(b"k1000", self.evaluate(v2_2.keys())) + self.assertEqual(3000.0, self.evaluate(v2_2.values())) + self.assertEqual(1000.0, self.evaluate(v0_2)) + self.assertEqual(2000.0, self.evaluate(v1_2)) + # Restore the values saved earlier in the parameter nodes. + save2 = saver_module.Saver({"v0": v0_2, "v1": v1_2, "v2": v2_2.saveable}) save2.restore(sess, save_path) # Check that the parameter nodes have been restored. - self.assertEqual(10.0, v0_2.eval()) - self.assertEqual(20.0, v1_2.eval()) - self.assertEqual(b"k1", v2_2.keys().eval()) - self.assertEqual(30.0, v2_2.values().eval()) + self.assertEqual(10.0, self.evaluate(v0_2)) + self.assertEqual(20.0, self.evaluate(v1_2)) + self.assertEqual(b"k1", self.evaluate(v2_2.keys())) + self.assertEqual(30.0, self.evaluate(v2_2.values())) def testBasic(self): self.basicSaveRestore(variables.Variable) + @test_util.run_in_graph_and_eager_modes() def testResourceBasic(self): self.basicSaveRestore(resource_variable_ops.ResourceVariable) + def testEagerBasic(self): + with context.eager_mode(): + ckpt_prefix = os.path.join(self.get_temp_dir(), "ckpt") + + v1 = resource_variable_ops.ResourceVariable(3.14, name="v1") + v2 = resource_variable_ops.ResourceVariable([1, 2], name="v2") + save = saver_module.Saver([v1, v2]) + save.save(None, ckpt_prefix) + + v1.assign(0.0) + v2.assign([0, 0]) + self.assertNear(0.0, self.evaluate(v1), 1e-5) + self.assertAllEqual([0, 0], self.evaluate(v2)) + + save.restore(None, ckpt_prefix) + self.assertNear(3.14, self.evaluate(v1), 1e-5) + self.assertAllEqual([1, 2], self.evaluate(v2)) + + def testEagerGraphCompatibility(self): + # Save from graph mode and restore from eager mode. + graph_ckpt_prefix = os.path.join(self.get_temp_dir(), "graph_ckpt") + with context.graph_mode(): + with self.test_session(graph=ops_lib.Graph()) as sess: + # Create a graph model and save the checkpoint. + w1 = resource_variable_ops.ResourceVariable(1.0, name="w1") + w2 = resource_variable_ops.ResourceVariable(2.0, name="w2") + graph_saver = saver_module.Saver([w1, w2]) + sess.run(variables.global_variables_initializer()) + graph_saver.save(sess, graph_ckpt_prefix) + + with context.eager_mode(): + ops_lib._default_graph_stack.reset() # pylint: disable=protected-access + ops_lib.reset_default_graph() + + w1 = resource_variable_ops.ResourceVariable(0.0, name="w1") + w2 = resource_variable_ops.ResourceVariable(0.0, name="w2") + + graph_saver = saver_module.Saver() + graph_saver.restore(None, graph_ckpt_prefix) + + self.assertAllEqual(self.evaluate(w1), 1.0) + self.assertAllEqual(self.evaluate(w2), 2.0) + + # Save from eager mode and restore from graph mode. + eager_ckpt_prefix = os.path.join(self.get_temp_dir(), "eager_ckpt") + with context.eager_mode(): + ops_lib._default_graph_stack.reset() # pylint: disable=protected-access + ops_lib.reset_default_graph() + + w3 = resource_variable_ops.ResourceVariable(3.0, name="w3") + w4 = resource_variable_ops.ResourceVariable(4.0, name="w4") + + graph_saver = saver_module.Saver() + graph_saver.save(None, eager_ckpt_prefix) + + with context.graph_mode(): + with self.test_session(graph=ops_lib.Graph()) as sess: + w3 = resource_variable_ops.ResourceVariable(0.0, name="w3") + w4 = resource_variable_ops.ResourceVariable(0.0, name="w4") + graph_saver = saver_module.Saver([w3, w4]) + sess.run(variables.global_variables_initializer()) + graph_saver.restore(sess, eager_ckpt_prefix) + self.assertAllEqual(w3.eval(), 3.0) + self.assertAllEqual(w4.eval(), 4.0) + + @test_util.run_in_graph_and_eager_modes() def testResourceSaveRestoreCachingDevice(self): save_path = os.path.join(self.get_temp_dir(), "resource_cache") - v = resource_variable_ops.ResourceVariable([1], caching_device="/cpu:0") - with self.test_session() as sess: - variables.global_variables_initializer().run() + with self.test_session(graph=ops_lib.Graph()) as sess: + v = resource_variable_ops.ResourceVariable([1], caching_device="/cpu:0") + if context.in_graph_mode(): + self.evaluate(variables.global_variables_initializer()) save = saver_module.Saver() save.save(sess, save_path) - with self.test_session() as sess: + save2 = saver_module.Saver() save2.restore(sess, save_path) - self.assertEquals(v.eval(), [1]) + self.assertEquals(self.evaluate(v), [1]) def testSaveCopyRestoreWithSaveRelativePaths(self): """Save, copy checkpoint dir and restore from copied dir. diff --git a/tensorflow/python/training/saver_test_utils.py b/tensorflow/python/training/saver_test_utils.py index bcabb41304..44b06b357e 100644 --- a/tensorflow/python/training/saver_test_utils.py +++ b/tensorflow/python/training/saver_test_utils.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops as ops_lib from tensorflow.python.ops import gen_lookup_ops @@ -39,9 +40,10 @@ class CheckpointedOp(object): else: self.table_ref = table_ref self._name = name - self._saveable = CheckpointedOp.CustomSaveable(self, name) - ops_lib.add_to_collection(ops_lib.GraphKeys.SAVEABLE_OBJECTS, - self._saveable) + if context.in_graph_mode(): + self._saveable = CheckpointedOp.CustomSaveable(self, name) + ops_lib.add_to_collection(ops_lib.GraphKeys.SAVEABLE_OBJECTS, + self._saveable) @property def name(self): @@ -49,7 +51,10 @@ class CheckpointedOp(object): @property def saveable(self): - return self._saveable + if context.in_graph_mode(): + return self._saveable + else: + return CheckpointedOp.CustomSaveable(self, self.name) def insert(self, keys, values): return gen_lookup_ops._lookup_table_insert_v2(self.table_ref, keys, values) diff --git a/tensorflow/python/training/training_util.py b/tensorflow/python/training/training_util.py index a40f63ccb3..bf48c75997 100644 --- a/tensorflow/python/training/training_util.py +++ b/tensorflow/python/training/training_util.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import graph_io from tensorflow.python.framework import ops @@ -55,6 +56,8 @@ def global_step(sess, global_step_tensor): Returns: The global step value. """ + if context.in_eager_mode(): + return int(global_step_tensor.numpy()) return int(sess.run(global_step_tensor)) -- GitLab From e2a32fbe51ff8f208cf04f0edc07bb6ef8f52546 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 Aug 2017 17:35:10 -0700 Subject: [PATCH 010/294] Update ops-related pbtxt files. PiperOrigin-RevId: 166547732 --- .../core/ops/compat/ops_history.v1.pbtxt | 32 +++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 36 +++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index dbca823a57..17343dc23d 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -27670,6 +27670,38 @@ op { } } } +op { + name: "SqlDataset" + input_arg { + name: "driver_name" + type: DT_STRING + } + input_arg { + name: "data_source_name" + type: DT_STRING + } + input_arg { + name: "query" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_RESOURCE + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} op { name: "Sqrt" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 640ca5591e..823dbd5b6b 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -27704,6 +27704,42 @@ op { } summary: "Splits a tensor into `num_split` tensors along one dimension." } +op { + name: "SqlDataset" + input_arg { + name: "driver_name" + description: "The database type. Currently, the only supported type is \'sqlite\'." + type: DT_STRING + } + input_arg { + name: "data_source_name" + description: "A connection string to connect to the database." + type: DT_STRING + } + input_arg { + name: "query" + description: "A SQL query to execute." + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_RESOURCE + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset that executes a SQL query and emits rows of the result set." + is_stateful: true +} op { name: "Sqrt" input_arg { -- GitLab From 554ad9abda7d42014c36df24c07bc5a9741ee6cb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 Aug 2017 17:41:54 -0700 Subject: [PATCH 011/294] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 166548278 --- tensorflow/go/op/wrappers.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index b0df85197d..25a164348f 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -5790,6 +5790,30 @@ func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf return op.Output(0) } +// Creates a dataset that executes a SQL query and emits rows of the result set. +// +// Arguments: +// driver_name: The database type. Currently, the only supported type is 'sqlite'. +// data_source_name: A connection string to connect to the database. +// query: A SQL query to execute. +// +// +func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "SqlDataset", + Input: []tf.Input{ + driver_name, data_source_name, query, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // PlaceholderAttr is an optional argument to Placeholder. type PlaceholderAttr func(optionalAttr) -- GitLab From 9a75ffd6668738244e22a73c3611d77c00d0a243 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 Aug 2017 18:14:39 -0700 Subject: [PATCH 012/294] Internal change. PiperOrigin-RevId: 166550940 --- tensorflow/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 566d8963d0..799f2a72d0 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -9,6 +9,9 @@ licenses(["notice"]) # Apache 2.0 exports_files([ "LICENSE", "ACKNOWLEDGMENTS", + # The leakr files are used by //third_party/cloud_tpu. + "leakr_badwords.dic", + "leakr_badfiles.dic", ]) # Config setting for determining if we are building for Android. -- GitLab From 17f26f81bfaf8ee03e330b98f4297cb754676c35 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Fri, 25 Aug 2017 18:39:23 -0700 Subject: [PATCH 013/294] Minor refactor of BatchNormalization's build, where we create gamma before beta. This fixes a major compatibility issue between tf.keras and multi-backend keras, in which the BN weights saved by one version would be incorrectly loaded by the other version. This change will not have side effects outside of Keras. PiperOrigin-RevId: 166552455 --- tensorflow/python/layers/normalization.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index dffbfc7290..62f5881f16 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -187,17 +187,6 @@ class BatchNormalization(base.Layer): self.input_spec = base.InputSpec(ndim=ndim, axes={self.axis: param_dim.value}) - if self.center: - self.beta = self.add_variable(name='beta', - shape=(param_dim,), - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint, - trainable=True) - else: - self.beta = None - if self.fused: - self._beta_const = array_ops.constant(0.0, shape=(param_dim,)) if self.scale: self.gamma = self.add_variable(name='gamma', shape=(param_dim,), @@ -210,6 +199,18 @@ class BatchNormalization(base.Layer): if self.fused: self._gamma_const = array_ops.constant(1.0, shape=(param_dim,)) + if self.center: + self.beta = self.add_variable(name='beta', + shape=(param_dim,), + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint, + trainable=True) + else: + self.beta = None + if self.fused: + self._beta_const = array_ops.constant(0.0, shape=(param_dim,)) + # Disable variable partitioning when creating the moving mean and variance try: if self._scope: -- GitLab From ea945f55ffef13c92a3065baa234a4dc58983ea3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 26 Aug 2017 00:00:13 -0700 Subject: [PATCH 014/294] Adding some comments explaining type and meaning of variables. PiperOrigin-RevId: 166564600 --- .../examples/tutorials/word2vec/word2vec_basic.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py index d73b1c6373..6d98c7b85d 100644 --- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py +++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py @@ -86,6 +86,12 @@ def build_dataset(words, n_words): reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys())) return data, count, dictionary, reversed_dictionary +# Filling 4 global variables: +# data - list of codes (integers from 0 to vocabulary_size-1). +# This is the original text but words are replaced by their codes +# count - map of words(strings) to count of occurences +# dictionary - map of words(strings) to their codes(integers) +# reverse_dictionary - maps codes(integers) to words(strings) data, count, dictionary, reverse_dictionary = build_dataset(vocabulary, vocabulary_size) del vocabulary # Hint to reduce memory. @@ -136,14 +142,16 @@ batch_size = 128 embedding_size = 128 # Dimension of the embedding vector. skip_window = 1 # How many words to consider left and right. num_skips = 2 # How many times to reuse an input to generate a label. +num_sampled = 64 # Number of negative examples to sample. # We pick a random validation set to sample nearest neighbors. Here we limit the # validation samples to the words that have a low numeric ID, which by -# construction are also the most frequent. +# construction are also the most frequent. These 3 variables are used only for +# displaying model accuracy, they don't affect calculation. valid_size = 16 # Random set of words to evaluate similarity on. valid_window = 100 # Only pick dev samples in the head of the distribution. valid_examples = np.random.choice(valid_window, valid_size, replace=False) -num_sampled = 64 # Number of negative examples to sample. + graph = tf.Graph() @@ -170,6 +178,8 @@ with graph.as_default(): # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. + # Explanation of the meaning of NCE loss: + # http://mccormickml.com/2016/04/19/word2vec-tutorial-the-skip-gram-model/ loss = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, -- GitLab From feb4e648bf72ebc6e6dc377e95329a93821e5eba Mon Sep 17 00:00:00 2001 From: Anna R Date: Sat, 26 Aug 2017 18:52:14 -0700 Subject: [PATCH 015/294] Internal change. PiperOrigin-RevId: 166603016 --- tensorflow/contrib/learn/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index c2e74d1cc2..6fbe204ec6 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -488,6 +488,7 @@ py_test( name = "dynamic_rnn_estimator_test", size = "medium", srcs = ["python/learn/estimators/dynamic_rnn_estimator_test.py"], + shard_count = 4, srcs_version = "PY2AND3", deps = [ ":learn", -- GitLab From f938347aeba95afa55d5dd0d3f911689eff31821 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 27 Aug 2017 08:30:51 -0700 Subject: [PATCH 016/294] Minor bugfix to HloInstruction::MergeFusionInstruction, and allow InstructionFusion::Fuse to be overridden by derived classes. PiperOrigin-RevId: 166631382 --- .../compiler/xla/service/hlo_instruction.cc | 9 ++++++++- .../compiler/xla/service/instruction_fusion.cc | 15 ++++++++------- .../compiler/xla/service/instruction_fusion.h | 7 +++++-- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 28ca915310..f5a081a9dc 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -532,6 +532,8 @@ void HloInstruction::MergeFusionInstruction( HloInstruction* instruction_to_merge) { CHECK_EQ(opcode_, HloOpcode::kFusion); CHECK_EQ(instruction_to_merge->opcode(), HloOpcode::kFusion); + CHECK(std::find(operands().begin(), operands().end(), instruction_to_merge) != + operands().end()); // Clone the instruction from which to merge fused instructions. std::unique_ptr clone = instruction_to_merge->Clone(); // Replace uses of fused parameters with the corresponding operand of the @@ -563,6 +565,11 @@ void HloInstruction::MergeFusionInstruction( } CHECK_EQ(0, clone->user_count()); clone->DetachFromOperands(); + + if (GetModule()) { + TF_CHECK_OK(GetModule()->RemoveEmbeddedComputation( + clone->fused_instructions_computation())); + } } void HloInstruction::MergeFusionInstructionIntoMultiOutput( @@ -2131,6 +2138,7 @@ using DFSStack = // cycle was detected, and true otherwise. inline bool PushDFSChild(DfsHloVisitor* visitor, DFSStack* dfs_stack, HloInstruction* child) { + CHECK(child != nullptr); const int id = child->unique_id(); CHECK_GE(id, 0) << "instruction may not have a parent computation"; switch (visitor->GetVisitState(id)) { @@ -2193,7 +2201,6 @@ static Status PostOrderDFS(HloInstruction* root, DfsHloVisitor* visitor, visitor->SetVisitState(current_id, DfsHloVisitor::kVisiting); const size_t old_dfs_stack_size = dfs_stack.size(); - for (HloInstruction* child : current_node->operands()) { if (!TF_PREDICT_TRUE(PushDFSChild(visitor, &dfs_stack, child))) { return FailedPrecondition( diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index edfcb0922d..d449c637b5 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -212,7 +212,7 @@ bool InstructionFusion::CanFuseOnAllPaths( StatusOr InstructionFusion::Run(HloModule* module) { bool changed = false; - + module_ = module; std::vector computations; for (auto& computation : module->computations()) { if (computation->IsFusionComputation()) { @@ -395,7 +395,6 @@ HloInstruction* InstructionFusion::Fuse(HloInstruction* producer, VLOG(2) << "Fusing " << producer->ToString() << " into " << consumer->ToString(); - auto kind = ChooseKind(producer, consumer); if (consumer->opcode() == HloOpcode::kFusion) { fusion_instruction = consumer; @@ -407,8 +406,8 @@ HloInstruction* InstructionFusion::Fuse(HloInstruction* producer, HloInstruction::CreateFusion(consumer->shape(), kind, consumer)); TF_CHECK_OK(computation_->ReplaceInstruction(consumer, fusion_instruction)); } - fusion_instruction->FuseInstruction(producer); + fusion_instruction->FuseInstruction(producer); return fusion_instruction; } @@ -423,13 +422,15 @@ bool InstructionFusion::ShouldFuse(HloInstruction* consumer, if (consumer->opcode() == HloOpcode::kFusion && consumer->fusion_kind() != HloInstruction::FusionKind::kLoop && - consumer->fusion_kind() != HloInstruction::FusionKind::kInput) { + consumer->fusion_kind() != HloInstruction::FusionKind::kInput && + consumer->fusion_kind() != HloInstruction::FusionKind::kOutput) { return false; } - // Cost condition: not fuse (expensive producers) and (consumers who reuse - // operand elements). - if (consumer->ReusesOperandElements(operand_index) && + // Cost condition: not fuse (simple, expensive producers) and (consumers who + // reuse operand elements). + if (producer->opcode() != HloOpcode::kFusion && + consumer->ReusesOperandElements(operand_index) && is_expensive_(*producer)) { return false; } diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h index f6f37bb79b..3ac13ffda0 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.h +++ b/tensorflow/compiler/xla/service/instruction_fusion.h @@ -66,12 +66,15 @@ class InstructionFusion : public HloPassInterface { virtual HloInstruction::FusionKind ChooseKind(const HloInstruction* producer, const HloInstruction* consumer); + // Fuses producer into consumer. + virtual HloInstruction* Fuse(HloInstruction* producer, + HloInstruction* consumer); + // Current HloComputation instance the loop fuser is traversing. HloComputation* computation_; + HloModule* module_; private: - HloInstruction* Fuse(HloInstruction* producer, HloInstruction* consumer); - // The set of producers whose consumers we cannot fuse into. using DoNotFuseSet = std::unordered_set; -- GitLab From b132f5b0d82c4ae35f48607485c8be8a26ea4c00 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 27 Aug 2017 18:15:16 -0700 Subject: [PATCH 017/294] [tensorflow] Add super(x, self).setUp() to `losses_test.py`. Without this, graphs aren't cleared between tests. PiperOrigin-RevId: 166651283 --- tensorflow/python/kernel_tests/losses_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py index d081d26679..da57f918ac 100644 --- a/tensorflow/python/kernel_tests/losses_test.py +++ b/tensorflow/python/kernel_tests/losses_test.py @@ -40,6 +40,7 @@ from tensorflow.python.training import momentum as momentum_lib class AbsoluteDifferenceLossTest(test.TestCase): def setUp(self): + super(AbsoluteDifferenceLossTest, self).setUp() self._predictions = constant_op.constant([4, 8, 12, 8, 1, 3], shape=(2, 3)) self._labels = constant_op.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) @@ -608,6 +609,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase): class LogLossTest(test.TestCase): def setUp(self): + super(LogLossTest, self).setUp() predictions = np.asarray([.9, .2, .2, .8, .4, .6]).reshape((2, 3)) labels = np.asarray([1.0, 0.0, 1.0, 1.0, 0.0, 0.0]).reshape((2, 3)) @@ -868,6 +870,7 @@ class HuberLossTest(test.TestCase): class MeanSquaredErrorTest(test.TestCase): def setUp(self): + super(MeanSquaredErrorTest, self).setUp() self._predictions = constant_op.constant([4, 8, 12, 8, 1, 3], shape=(2, 3)) self._labels = constant_op.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) @@ -941,6 +944,7 @@ class MeanSquaredErrorTest(test.TestCase): class MeanPairwiseSquaredErrorTest(test.TestCase): def setUp(self): + super(MeanPairwiseSquaredErrorTest, self).setUp() self._predictions = np.array([[4, 8, 12], [8, 1, 3]]) self._labels = np.array([[1, 9, 2], [-5, -5, 7]]) @@ -1167,6 +1171,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): class CosineDistanceLossTest(test.TestCase): def setUp(self): + super(CosineDistanceLossTest, self).setUp() self._predictions = np.asarray([ [1, 0, 0], # Batch 1 [0, 0, -1], @@ -1290,6 +1295,7 @@ class AddLossTest(test.TestCase): class ComputeWeightedLossTest(test.TestCase): def setUp(self): + super(ComputeWeightedLossTest, self).setUp() self._shape = (3, 2, 4) raw_losses = np.zeros(self._shape) next_loss = 0.0 -- GitLab From 11698cc8e157eefe71a60931f1e721ad327e08af Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Mon, 28 Aug 2017 10:06:06 -0700 Subject: [PATCH 018/294] Verify the output shape of HLO instructions in the HloVerifier. This change adds verification for some but not all instruction types. To support this verification, add HLO-level methods to ShapeInference. These methods will also be useful to automatically infer shape in the HloInstruction::Create* methods. This CL also fixes some tests and transformations with malformed instructions found by the verifier. PiperOrigin-RevId: 166718979 --- tensorflow/compiler/xla/service/BUILD | 4 + .../xla/service/algebraic_simplifier.cc | 39 ++- .../xla/service/algebraic_simplifier_test.cc | 15 +- .../compiler/xla/service/gpu/gpu_compiler.cc | 4 + .../compiler/xla/service/hlo_instruction.cc | 25 +- .../compiler/xla/service/hlo_pass_pipeline.cc | 1 + .../compiler/xla/service/hlo_verifier.cc | 259 ++++++++++++++++++ .../compiler/xla/service/shape_inference.cc | 190 +++++++++++-- .../compiler/xla/service/shape_inference.h | 26 +- tensorflow/compiler/xla/tests/fusion_test.cc | 22 +- .../xla/tests/multioutput_fusion_test.cc | 4 +- tensorflow/examples/speech_commands/freeze.py | 4 +- .../generate_streaming_test_wav.py | 4 +- 13 files changed, 523 insertions(+), 74 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index f7605cf35e..fc639ec573 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -41,6 +41,7 @@ cc_library( srcs = ["shape_inference.cc"], hdrs = ["shape_inference.h"], deps = [ + ":hlo", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", @@ -1008,6 +1009,7 @@ cc_library( ":hlo", ":hlo_pass", ":hlo_query", + ":hlo_verifier", ":shape_inference", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", @@ -1604,6 +1606,8 @@ cc_library( hdrs = ["hlo_verifier.h"], deps = [ ":hlo_pass", + ":shape_inference", + "//tensorflow/compiler/xla:status_macros", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 59cf08be47..d3649a1ed1 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_query.h" +#include "tensorflow/compiler/xla/service/hlo_verifier.h" #include "tensorflow/compiler/xla/service/shape_inference.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -239,9 +240,9 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { Status ReplaceWithNewInstruction( HloInstruction* old_instruction, std::unique_ptr new_instruction) { - VLOG(4) << "Replacing instruction:"; - VLOG(4) << " old: " << old_instruction->ToString(); - VLOG(4) << " new: " << new_instruction->ToString(); + VLOG(3) << "Replacing instruction:"; + VLOG(3) << " old: " << old_instruction->ToString(); + VLOG(3) << " new: " << new_instruction->ToString(); TF_RETURN_IF_ERROR(computation_->ReplaceWithNewInstruction( old_instruction, std::move(new_instruction))); changed_ = true; @@ -253,9 +254,9 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { // Returns the Status representing the result of the replace operation. Status ReplaceInstruction(HloInstruction* old_instruction, HloInstruction* new_instruction) { - VLOG(4) << "Replacing instruction:"; - VLOG(4) << " old: " << old_instruction->ToString(); - VLOG(4) << " new: " << new_instruction->ToString(); + VLOG(3) << "Replacing instruction:"; + VLOG(3) << " old: " << old_instruction->ToString(); + VLOG(3) << " new: " << new_instruction->ToString(); TF_RETURN_IF_ERROR( computation_->ReplaceInstruction(old_instruction, new_instruction)); changed_ = true; @@ -514,11 +515,19 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide, // (A / B) / (C / D) => (A / B)*(D / C) => (A * D) / (B * C) if (lhs->opcode() == HloOpcode::kDivide && rhs->opcode() == HloOpcode::kDivide) { + TF_ASSIGN_OR_RETURN( + const Shape a_times_d_shape, + ShapeInference::InferBinaryOpShape(HloOpcode::kMultiply, + lhs->operand(0), rhs->operand(1))); auto a_times_d = computation_->AddInstruction(HloInstruction::CreateBinary( - divide->shape(), HloOpcode::kMultiply, lhs->mutable_operand(0), + a_times_d_shape, HloOpcode::kMultiply, lhs->mutable_operand(0), rhs->mutable_operand(1))); + TF_ASSIGN_OR_RETURN( + const Shape b_times_c_shape, + ShapeInference::InferBinaryOpShape(HloOpcode::kMultiply, + lhs->operand(1), rhs->operand(0))); auto b_times_c = computation_->AddInstruction(HloInstruction::CreateBinary( - divide->shape(), HloOpcode::kMultiply, lhs->mutable_operand(1), + b_times_c_shape, HloOpcode::kMultiply, lhs->mutable_operand(1), rhs->mutable_operand(0))); return ReplaceWithNewInstruction( divide, HloInstruction::CreateBinary( @@ -527,8 +536,11 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide, // (A / B) / C => A / (B * C) if (lhs->opcode() == HloOpcode::kDivide) { + TF_ASSIGN_OR_RETURN(const Shape b_times_c_shape, + ShapeInference::InferBinaryOpShape( + HloOpcode::kMultiply, lhs->operand(1), rhs)); auto b_times_c = computation_->AddInstruction(HloInstruction::CreateBinary( - divide->shape(), HloOpcode::kMultiply, lhs->mutable_operand(1), rhs)); + b_times_c_shape, HloOpcode::kMultiply, lhs->mutable_operand(1), rhs)); return ReplaceWithNewInstruction( divide, HloInstruction::CreateBinary(divide->shape(), HloOpcode::kDivide, @@ -537,8 +549,11 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide, // A / (B / C) => (A*C) / B if (rhs->opcode() == HloOpcode::kDivide) { + TF_ASSIGN_OR_RETURN(const Shape a_times_c_shape, + ShapeInference::InferBinaryOpShape( + HloOpcode::kMultiply, lhs, rhs->operand(1))); auto a_times_c = computation_->AddInstruction(HloInstruction::CreateBinary( - divide->shape(), HloOpcode::kMultiply, lhs, rhs->mutable_operand(1))); + a_times_c_shape, HloOpcode::kMultiply, lhs, rhs->mutable_operand(1))); return ReplaceWithNewInstruction( divide, HloInstruction::CreateBinary(divide->shape(), HloOpcode::kDivide, @@ -1617,6 +1632,10 @@ StatusOr AlgebraicSimplifier::Run(HloModule* module) { } XLA_VLOG_LINES(2, "AlgebraicSimplifier::Run(), after:\n" + module->ToString()); + + HloVerifier verifier; + TF_DCHECK_OK(verifier.Run(module).status()); + return changed; } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index be71e03e98..c442e2d0bc 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -199,21 +199,22 @@ TEST_F(AlgebraicSimplifierTest, RhsDivOfDiv) { // Test that (A/B)/(C/D) is simplified to (A*D)/(B*C). TEST_F(AlgebraicSimplifierTest, DivOfDivAndDiv) { Shape r0f32 = ShapeUtil::MakeShape(F32, {}); + Shape r2f32 = ShapeUtil::MakeShape(F32, {42, 123}); HloComputation::Builder builder(TestName()); HloInstruction* param0 = builder.AddInstruction( HloInstruction::CreateParameter(0, r0f32, "param0")); HloInstruction* param1 = builder.AddInstruction( - HloInstruction::CreateParameter(1, r0f32, "param1")); + HloInstruction::CreateParameter(1, r2f32, "param1")); HloInstruction* param2 = builder.AddInstruction( - HloInstruction::CreateParameter(2, r0f32, "param2")); + HloInstruction::CreateParameter(2, r2f32, "param2")); HloInstruction* param3 = builder.AddInstruction( HloInstruction::CreateParameter(3, r0f32, "param3")); HloInstruction* div0 = builder.AddInstruction( - HloInstruction::CreateBinary(r0f32, HloOpcode::kDivide, param0, param1)); + HloInstruction::CreateBinary(r2f32, HloOpcode::kDivide, param0, param1)); HloInstruction* div1 = builder.AddInstruction( - HloInstruction::CreateBinary(r0f32, HloOpcode::kDivide, param2, param3)); + HloInstruction::CreateBinary(r2f32, HloOpcode::kDivide, param2, param3)); builder.AddInstruction( - HloInstruction::CreateBinary(r0f32, HloOpcode::kDivide, div0, div1)); + HloInstruction::CreateBinary(r2f32, HloOpcode::kDivide, div0, div1)); auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); @@ -229,6 +230,8 @@ TEST_F(AlgebraicSimplifierTest, DivOfDivAndDiv) { EXPECT_THAT( computation->root_instruction(), op::Divide(op::Multiply(param0, param3), op::Multiply(param1, param2))); + EXPECT_TRUE( + ShapeUtil::Compatible(computation->root_instruction()->shape(), r2f32)); } // Test that A/exp(B) is simplified to A*exp(-B). @@ -995,7 +998,7 @@ TEST_F(AlgebraicSimplifierTest, ReshapeToScalarNotHoistedAfterEffectiveUnary) { HloInstruction* zero = builder.AddInstruction( HloInstruction::CreateConstant(Literal::CreateR1({1., 2., 3.}))); builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {}), HloOpcode::kMaximum, reshape, zero)); + ShapeUtil::MakeShape(F32, {3}), HloOpcode::kMaximum, reshape, zero)); auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index dd51c11d5e..70f2b573be 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -131,6 +131,8 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module, { auto& pass = pipeline.AddPass>("simplification"); + pass.AddInvariantChecker(); + // TODO(b/62764704): Do not rewrite on GPU, use cuDNN's BatchNorm APIs // instead. pass.AddPass( @@ -158,12 +160,14 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module, } { HloPassFix fusion("fusion"); + fusion.AddInvariantChecker(); fusion.AddPass(/*may_duplicate=*/false); fusion.AddPass(/*may_duplicate=*/true); fusion.AddPass(); TF_RETURN_IF_ERROR(fusion.Run(hlo_module).status()); HloPassPipeline reduce_pipeline("reduce-precision"); + reduce_pipeline.AddInvariantChecker(); ReducePrecisionInsertion::AddPasses( &reduce_pipeline, hlo_module->config().debug_options(), HloReducePrecisionOptions::AFTER_OP_FUSION); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index f5a081a9dc..237e745383 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -537,10 +537,8 @@ void HloInstruction::MergeFusionInstruction( // Clone the instruction from which to merge fused instructions. std::unique_ptr clone = instruction_to_merge->Clone(); // Replace uses of fused parameters with the corresponding operand of the - // fusion. - // Add all non-parameter fused instructions to 'unfused_instructions' to be - // merged into 'this'. - // This is done in reverse post order. + // fusion. Add all non-parameter fused instructions to 'unfused_instructions' + // to be merged into 'this'. This is done in reverse post order. std::vector unfused_instructions; auto fused_instructions = clone->fused_instructions_computation()->MakeInstructionPostOrder(); @@ -565,11 +563,8 @@ void HloInstruction::MergeFusionInstruction( } CHECK_EQ(0, clone->user_count()); clone->DetachFromOperands(); - - if (GetModule()) { - TF_CHECK_OK(GetModule()->RemoveEmbeddedComputation( - clone->fused_instructions_computation())); - } + TF_CHECK_OK(parent()->parent()->RemoveEmbeddedComputation( + clone->fused_instructions_computation())); } void HloInstruction::MergeFusionInstructionIntoMultiOutput( @@ -649,9 +644,7 @@ HloInstruction* HloInstruction::CloneAndFuseInternal( HloInstruction* instruction_to_fuse, bool add_output) { CHECK_EQ(opcode_, HloOpcode::kFusion); CHECK(instruction_to_fuse->IsFusable()); - if (GetModule()) { - XLA_VLOG_LINES(3, GetModule()->ToString()); - } + VLOG(3) << "CloneAndFuseInternal:\n" << instruction_to_fuse->ToString(); HloInstruction* clone = nullptr; if (called_computations_.empty()) { // New fusion instruction. It should not be a multioutput instruction. @@ -814,6 +807,7 @@ HloInstruction* HloInstruction::CloneAndFuseInternal( called_computations_.push_back(computation); } } + VLOG(2) << "New clone:\n" << clone->ToString(); return clone; } @@ -955,6 +949,12 @@ void HloInstruction::CheckFusionInstruction() const { std::unique_ptr HloInstruction::CloneWithNewOperands( const Shape& shape, tensorflow::gtl::ArraySlice new_operands) { + VLOG(3) << "CloneWithNewOperands:\n " << ToString(); + VLOG(3) << " new operands:"; + for (const HloInstruction* new_operand : new_operands) { + VLOG(3) << " " << new_operand->name(); + } + // Explicitly call the factory for the instruction type. This is more robust // in the face of code changes than copying fields explicitly. This also // properly sets the user fields of the operands. @@ -1561,6 +1561,7 @@ Status HloInstruction::ReplaceAllUsesWith(HloInstruction* new_producer) { } void HloInstruction::DetachFromOperands() { + VLOG(3) << "DetachFromOperands:\n " << ToString(); CHECK_EQ(0, user_count()); // An instruction may be repeated as an operand. To avoid calling RemoveUser // twice on the same operand, keep a set of already detached operands. diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc index 4b824f8240..eb3da111a2 100644 --- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc +++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc @@ -56,6 +56,7 @@ StatusOr HloPassPipeline::Run(HloModule* module) { auto run_invariant_checkers = [this, module]() -> Status { for (auto& invariant_checker : invariant_checkers_) { + VLOG(1) << " Invariant checker " << invariant_checker->name(); TF_ASSIGN_OR_RETURN(bool changed, invariant_checker->Run(module)); TF_RET_CHECK(!changed) << "invariant checkers must not change the graph"; } diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 01fba49bc5..29ef237bc0 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -14,12 +14,269 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/hlo_verifier.h" +#include "tensorflow/compiler/xla/service/shape_inference.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/flatmap.h" namespace xla { +namespace { + +// Visitor which verifies that the output shape is correctly set. Verifies +// against the inferred shape for the instruction. +// TODO(b/26024837): Check output shape for all instruction types. +class ShapeVerifier : public DfsHloVisitor { + public: + Status HandleElementwiseUnary(HloInstruction* hlo, + HloOpcode opcode) override { + return CheckUnaryShape(hlo); + } + + Status HandleElementwiseBinary(HloInstruction* hlo, + HloOpcode opcode) override { + return CheckBinaryShape(hlo); + } + + Status HandleClamp(HloInstruction* clamp, HloInstruction* min, + HloInstruction* arg, HloInstruction* max) override { + return CheckTernaryShape(clamp); + } + + Status HandleSelect(HloInstruction* select, HloInstruction* pred, + HloInstruction* on_true, + HloInstruction* on_false) override { + return CheckTernaryShape(select); + } + + Status HandleConcatenate( + HloInstruction* concatenate, + tensorflow::gtl::ArraySlice operands) override { + return tensorflow::Status::OK(); + } + + Status HandleConvert(HloInstruction* convert) override { + return tensorflow::Status::OK(); + } + + Status HandleCopy(HloInstruction* copy) override { + return CheckUnaryShape(copy); + } + + Status HandleDot(HloInstruction* dot, HloInstruction* lhs, + HloInstruction* rhs) override { + return CheckBinaryShape(dot); + } + + Status HandleConvolution(HloInstruction* convolution, HloInstruction* lhs, + HloInstruction* rhs, const Window& window) override { + return tensorflow::Status::OK(); + } + + Status HandleCrossReplicaSum(HloInstruction* crs) override { + return tensorflow::Status::OK(); + } + + Status HandleReducePrecision(HloInstruction* reduce_precision) override { + return tensorflow::Status::OK(); + } + + Status HandleInfeed(HloInstruction* infeed) override { + return tensorflow::Status::OK(); + } + + Status HandleOutfeed(HloInstruction* outfeed) override { + return tensorflow::Status::OK(); + } + + Status HandleRng(HloInstruction* random, + RandomDistribution distribution) override { + return tensorflow::Status::OK(); + } + + Status HandleReverse(HloInstruction* reverse, + HloInstruction* operand) override { + return tensorflow::Status::OK(); + } + + Status HandleSort(HloInstruction* sort, HloInstruction* operand) override { + return tensorflow::Status::OK(); + } + + Status HandleConstant(HloInstruction* constant, + const Literal& literal) override { + return tensorflow::Status::OK(); + } + + Status HandleGetTupleElement(HloInstruction* get_tuple_element, + HloInstruction* operand) override { + return tensorflow::Status::OK(); + } + + Status HandleReduce(HloInstruction* reduce, HloInstruction* arg, + HloInstruction* init_value, + tensorflow::gtl::ArraySlice dimensions, + HloComputation* function) override { + return tensorflow::Status::OK(); + } + + Status HandleBitcast(HloInstruction* bitcast) override { + return tensorflow::Status::OK(); + } + + Status HandleBroadcast(HloInstruction* broadcast) override { + return tensorflow::Status::OK(); + } + + Status HandleReshape(HloInstruction* reshape) override { + return tensorflow::Status::OK(); + } + + Status HandleTranspose(HloInstruction* transpose) override { + return tensorflow::Status::OK(); + } + + Status HandleParameter(HloInstruction* parameter) override { + return tensorflow::Status::OK(); + } + + Status HandleFusion(HloInstruction* fusion) override { + return tensorflow::Status::OK(); + } + + Status HandleCall(HloInstruction* call) override { + return tensorflow::Status::OK(); + } + + Status HandleCustomCall(HloInstruction* custom_call, + tensorflow::gtl::ArraySlice operands, + tensorflow::StringPiece custom_call_target) override { + return tensorflow::Status::OK(); + } + + Status HandleSlice(HloInstruction* slice, HloInstruction* operand) override { + return tensorflow::Status::OK(); + } + + Status HandleDynamicSlice(HloInstruction* dynamic_slice, + HloInstruction* operand, + HloInstruction* start_indices) override { + return tensorflow::Status::OK(); + } + + Status HandleDynamicUpdateSlice(HloInstruction* dynamic_update_slice, + HloInstruction* operand, + HloInstruction* update, + HloInstruction* start_indices) override { + return tensorflow::Status::OK(); + } + + Status HandleTuple( + HloInstruction* tuple, + tensorflow::gtl::ArraySlice operands) override { + return CheckVariadicShape(tuple); + } + + Status HandleMap( + HloInstruction* map, + tensorflow::gtl::ArraySlice operands, + HloComputation* function, + tensorflow::gtl::ArraySlice static_operands) override { + return tensorflow::Status::OK(); + } + + Status HandleReduceWindow(HloInstruction* reduce_window, + HloInstruction* operand, const Window& window, + HloComputation* function) override { + return tensorflow::Status::OK(); + } + + Status HandleSelectAndScatter(HloInstruction* instruction) override { + return tensorflow::Status::OK(); + } + + Status HandleWhile(HloInstruction* xla_while) override { + return tensorflow::Status::OK(); + } + + Status HandlePad(HloInstruction* pad) override { + return tensorflow::Status::OK(); + } + + Status HandleSend(HloInstruction* send) override { + return tensorflow::Status::OK(); + } + + Status HandleRecv(HloInstruction* recv) override { + return tensorflow::Status::OK(); + } + + Status HandleBatchNormTraining(HloInstruction* batchNormTraining) override { + return tensorflow::Status::OK(); + } + + Status HandleBatchNormInference(HloInstruction* batchNormInference) override { + return tensorflow::Status::OK(); + } + + Status HandleBatchNormGrad(HloInstruction* batchNormGrad) override { + return tensorflow::Status::OK(); + } + + Status FinishVisit(HloInstruction* root) override { + return tensorflow::Status::OK(); + } + + private: + // Check the instruction's shape against the given expected shape and return + // an appropriate error if there is a mismatch. + Status CheckShape(const HloInstruction* instruction, + const Shape& expected_shape) { + if (!ShapeUtil::Compatible(instruction->shape(), expected_shape)) { + return InvalidArgument( + "Expected instruction to have shape compatible with %s, actual " + "shape is %s:\n%s", + ShapeUtil::HumanString(expected_shape).c_str(), + ShapeUtil::HumanString(instruction->shape()).c_str(), + instruction->ToString().c_str()); + } + return tensorflow::Status::OK(); + } + + // Check a unary (binary, etc) instruction's shape against the inferred shape. + Status CheckUnaryShape(const HloInstruction* instruction) { + TF_ASSIGN_OR_RETURN(const Shape expected, + ShapeInference::InferUnaryOpShape( + instruction->opcode(), instruction->operand(0))); + return CheckShape(instruction, expected); + } + Status CheckBinaryShape(const HloInstruction* instruction) { + TF_ASSIGN_OR_RETURN(const Shape expected, + ShapeInference::InferBinaryOpShape( + instruction->opcode(), instruction->operand(0), + instruction->operand(1))); + return CheckShape(instruction, expected); + } + Status CheckTernaryShape(const HloInstruction* instruction) { + TF_ASSIGN_OR_RETURN(const Shape expected, + ShapeInference::InferTernaryOpShape( + instruction->opcode(), instruction->operand(0), + instruction->operand(1), instruction->operand(2))); + return CheckShape(instruction, expected); + } + Status CheckVariadicShape(const HloInstruction* instruction) { + TF_ASSIGN_OR_RETURN(const Shape expected, + ShapeInference::InferVariadicOpShape( + instruction->opcode(), instruction->operands())); + return CheckShape(instruction, expected); + } +}; + +} // namespace + StatusOr HloVerifier::Run(HloModule* module) { tensorflow::gtl::FlatMap instructions; + ShapeVerifier shape_verifier; for (auto& computation : module->computations()) { for (const auto& instruction : computation->instructions()) { @@ -44,6 +301,8 @@ StatusOr HloVerifier::Run(HloModule* module) { << " in computation: " << previous->second->parent()->name(); instructions[instruction->name()] = instruction.get(); } + + TF_RETURN_IF_ERROR(computation->Accept(&shape_verifier)); } return false; diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 8eeb1cd5d2..1a24c6c493 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -39,6 +39,112 @@ namespace xla { namespace { +// Return the UnaryOperation proto enum value associated with the given HLO +// opcode. +UnaryOperation OpcodeToUnaryOperation(HloOpcode opcode) { + switch (opcode) { + case HloOpcode::kAbs: + return UNOP_ABS; + case HloOpcode::kCeil: + return UNOP_CEIL; + case HloOpcode::kCos: + return UNOP_COS; + case HloOpcode::kExp: + return UNOP_EXP; + case HloOpcode::kFloor: + return UNOP_FLOOR; + case HloOpcode::kIsFinite: + return UNOP_IS_FINITE; + case HloOpcode::kLog: + return UNOP_LOG; + case HloOpcode::kLogicalNot: + return UNOP_LOGICAL_NOT; + case HloOpcode::kNegate: + return UNOP_NEGATE; + case HloOpcode::kSign: + return UNOP_SIGN; + case HloOpcode::kSin: + return UNOP_SIN; + case HloOpcode::kSort: + return UNOP_SORT; + case HloOpcode::kTanh: + return UNOP_TANH; + default: + LOG(FATAL) << "unhandled opcode " << opcode; + } +} + +// Return the BinaryOperation proto enum value associated with the given HLO +// opcode. +BinaryOperation OpcodeToBinaryOperation(HloOpcode opcode) { + switch (opcode) { + case HloOpcode::kDot: + return BINOP_DOT; + case HloOpcode::kMultiply: + return BINOP_MUL; + case HloOpcode::kAdd: + return BINOP_ADD; + case HloOpcode::kSubtract: + return BINOP_SUB; + case HloOpcode::kIndex: + return BINOP_INDEX; + case HloOpcode::kDivide: + return BINOP_DIV; + case HloOpcode::kEq: + return BINOP_EQ; + case HloOpcode::kGe: + return BINOP_GE; + case HloOpcode::kGt: + return BINOP_GT; + case HloOpcode::kLe: + return BINOP_LE; + case HloOpcode::kLt: + return BINOP_LT; + case HloOpcode::kNe: + return BINOP_NE; + case HloOpcode::kMaximum: + return BINOP_MAX; + case HloOpcode::kMinimum: + return BINOP_MIN; + case HloOpcode::kPower: + return BINOP_POW; + case HloOpcode::kRemainder: + return BINOP_REM; + case HloOpcode::kLogicalOr: + return BINOP_LOGICAL_OR; + case HloOpcode::kLogicalAnd: + return BINOP_LOGICAL_AND; + default: + LOG(FATAL) << "unhandled opcode " << opcode; + } +} + +// Return the TernaryOperation proto enum value associated with the given HLO +// opcode. +TernaryOperation OpcodeToTernaryOperation(HloOpcode opcode) { + switch (opcode) { + case HloOpcode::kClamp: + return TRIOP_CLAMP; + case HloOpcode::kSelect: + return TRIOP_SELECT; + case HloOpcode::kUpdate: + return TRIOP_UPDATE; + default: + LOG(FATAL) << "unhandled opcode " << opcode; + } +} + +// Return the VariadicOperation proto enum value associated with the given HLO +// opcode. +VariadicOperation OpcodeToVariadicOperation(HloOpcode opcode) { + switch (opcode) { + case HloOpcode::kTuple: + return VAROP_TUPLE; + default: + LOG(FATAL) << "unhandled opcode " << opcode; + } +} + // Returns true if no element is present in slice more than once. bool AllUnique(tensorflow::gtl::ArraySlice slice) { return std::set(slice.begin(), slice.end()).size() == slice.size(); @@ -176,11 +282,21 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, } // namespace +/* static */ StatusOr ShapeInference::InferUnaryOpShape( + HloOpcode opcode, const HloInstruction* operand) { + // There is no copy operation at the proto level, so handle copy explicitly. + if (opcode == HloOpcode::kCopy) { + return operand->shape(); + } + + return InferUnaryOpShape(OpcodeToUnaryOperation(opcode), operand->shape()); +} + /* static */ StatusOr ShapeInference::InferUnaryOpShape( UnaryOperation operation, const Shape& arg) { TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(arg, "operand of unary operation")); - TF_DCHECK_OK(ShapeUtil::ValidateShape(arg)); + TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(arg)); switch (operation) { case UNOP_FLOOR: case UNOP_CEIL: @@ -410,7 +526,7 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, } Shape result = ShapeUtil::MakeShape(lhs.element_type(), dimensions); - TF_DCHECK_OK(ShapeUtil::ValidateShape(result)); + TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(result)); VLOG(2) << "inferred dot shape: " << ShapeUtil::HumanString(result); return result; } @@ -592,6 +708,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } } +/* static */ StatusOr ShapeInference::InferBinaryOpShape( + HloOpcode opcode, const HloInstruction* lhs, const HloInstruction* rhs) { + return InferBinaryOpShape(OpcodeToBinaryOperation(opcode), lhs->shape(), + rhs->shape(), /*broadcast_dimensions=*/{}); +} + /* static */ StatusOr ShapeInference::InferBinaryOpShape( BinaryOperation operation, const Shape& lhs, const Shape& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { @@ -600,8 +722,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( BinaryOperation_Name(operation).c_str(), ShapeUtil::HumanString(lhs).c_str(), ShapeUtil::HumanString(rhs).c_str(), tensorflow::str_util::Join(broadcast_dimensions, ", ").c_str()); - TF_DCHECK_OK(ShapeUtil::ValidateShape(lhs)); - TF_DCHECK_OK(ShapeUtil::ValidateShape(rhs)); + TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(lhs)); + TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(rhs)); TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(lhs, "lhs of binary operation")); TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(rhs, "rhs of binary operation")); @@ -660,12 +782,19 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } } +/* static */ StatusOr ShapeInference::InferTernaryOpShape( + HloOpcode opcode, const HloInstruction* lhs, const HloInstruction* rhs, + const HloInstruction* ehs) { + return InferTernaryOpShape(OpcodeToTernaryOperation(opcode), lhs->shape(), + rhs->shape(), ehs->shape()); +} + /* static */ StatusOr ShapeInference::InferTernaryOpShape( TernaryOperation operation, const Shape& lhs, const Shape& rhs, const Shape& ehs) { - TF_DCHECK_OK(ShapeUtil::ValidateShape(lhs)); - TF_DCHECK_OK(ShapeUtil::ValidateShape(rhs)); - TF_DCHECK_OK(ShapeUtil::ValidateShape(ehs)); + TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(lhs)); + TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(rhs)); + TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(ehs)); switch (operation) { case TRIOP_CLAMP: return InferClampShape(lhs, rhs, ehs); @@ -686,9 +815,21 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } /* static */ StatusOr ShapeInference::InferVariadicOpShape( - VariadicOperation operation, std::vector operand_shapes) { + HloOpcode opcode, + tensorflow::gtl::ArraySlice operands) { + std::vector operand_shapes; + for (const HloInstruction* operand : operands) { + operand_shapes.push_back(&operand->shape()); + } + return InferVariadicOpShape(OpcodeToVariadicOperation(opcode), + operand_shapes); +} + +/* static */ StatusOr ShapeInference::InferVariadicOpShape( + VariadicOperation operation, + tensorflow::gtl::ArraySlice operand_shapes) { for (const Shape* shape : operand_shapes) { - TF_DCHECK_OK(ShapeUtil::ValidateShape(*shape)); + TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(*shape)); } switch (operation) { case VAROP_TUPLE: { @@ -792,11 +933,11 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( scale_shape, "scale input of batch norm training")); - TF_RET_CHECK(ShapeUtil::ValidateShape(operand_shape) == + TF_RET_CHECK(ShapeUtil::ValidateShapeWithOptionalLayout(operand_shape) == tensorflow::Status::OK()); - TF_RET_CHECK(ShapeUtil::ValidateShape(offset_shape) == + TF_RET_CHECK(ShapeUtil::ValidateShapeWithOptionalLayout(offset_shape) == tensorflow::Status::OK()); - TF_RET_CHECK(ShapeUtil::ValidateShape(scale_shape) == + TF_RET_CHECK(ShapeUtil::ValidateShapeWithOptionalLayout(scale_shape) == tensorflow::Status::OK()); if (feature_index >= ShapeUtil::Rank(operand_shape)) { @@ -896,15 +1037,15 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( scale_shape, "scale input of batch norm inference")); - TF_RET_CHECK(ShapeUtil::ValidateShape(operand_shape) == + TF_RET_CHECK(ShapeUtil::ValidateShapeWithOptionalLayout(operand_shape) == tensorflow::Status::OK()); - TF_RET_CHECK(ShapeUtil::ValidateShape(offset_shape) == + TF_RET_CHECK(ShapeUtil::ValidateShapeWithOptionalLayout(offset_shape) == tensorflow::Status::OK()); - TF_RET_CHECK(ShapeUtil::ValidateShape(scale_shape) == + TF_RET_CHECK(ShapeUtil::ValidateShapeWithOptionalLayout(scale_shape) == tensorflow::Status::OK()); - TF_RET_CHECK(ShapeUtil::ValidateShape(mean_shape) == + TF_RET_CHECK(ShapeUtil::ValidateShapeWithOptionalLayout(mean_shape) == tensorflow::Status::OK()); - TF_RET_CHECK(ShapeUtil::ValidateShape(variance_shape) == + TF_RET_CHECK(ShapeUtil::ValidateShapeWithOptionalLayout(variance_shape) == tensorflow::Status::OK()); if (feature_index >= ShapeUtil::Rank(operand_shape)) { @@ -1044,11 +1185,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( output_grad_shape, "output_grad input of batch norm grad")); - TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(operand_shape)); - TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(mean_shape)); - TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(scale_shape)); - TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(var_shape)); - TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(output_grad_shape)); + TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(operand_shape)); + TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(mean_shape)); + TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(scale_shape)); + TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(var_shape)); + TF_RETURN_IF_ERROR( + ShapeUtil::ValidateShapeWithOptionalLayout(output_grad_shape)); if (feature_index >= ShapeUtil::Rank(operand_shape)) { return InvalidArgument( @@ -1230,8 +1372,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "lhs: %s", num_dims, ShapeUtil::HumanString(lhs).c_str()); } - TF_DCHECK_OK(ShapeUtil::ValidateShape(lhs)); - TF_DCHECK_OK(ShapeUtil::ValidateShape(rhs)); + TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(lhs)); + TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(rhs)); // Verifies that the input and window dimensions are a permutation of // the dimension numbers. diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h index 5d55df92a9..96e3b46c7d 100644 --- a/tensorflow/compiler/xla/service/shape_inference.h +++ b/tensorflow/compiler/xla/service/shape_inference.h @@ -21,6 +21,8 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -31,32 +33,48 @@ limitations under the License. namespace xla { // For a given operation and input shapes, infers what the resulting shape is -// for the operation. With this functionality, the user does not need to -// specify the expected result type for computations that are built up via the -// API -- the shape that results from an operation is inferred. +// for the operation. With this functionality, the user does not need to specify +// the expected result type for computations that are built up via the API -- +// the shape that results from an operation is inferred. Some methods have +// overloads for inferring shape at the HLO level. +// TODO(b/166374537): Complete HLO level inference overloads and use to +// automatically infer shape in HloInstruction::Create* methods. class ShapeInference { public: // Infers the shape produced by applying the given unary operation to the // given input shape. static StatusOr InferUnaryOpShape(UnaryOperation operation, const Shape& arg); + static StatusOr InferUnaryOpShape(HloOpcode opcode, + const HloInstruction* operand); // Infers the shape produced by applying the given binary operation to the // given input shapes. static StatusOr InferBinaryOpShape( BinaryOperation operation, const Shape& lhs, const Shape& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions); + static StatusOr InferBinaryOpShape(HloOpcode opcode, + const HloInstruction* lhs, + const HloInstruction* rhs); // Infers the shape produced by applying the given ternary operation to the // given input shapes. static StatusOr InferTernaryOpShape(TernaryOperation operation, const Shape& lhs, const Shape& rhs, const Shape& ehs); + static StatusOr InferTernaryOpShape(HloOpcode opcode, + const HloInstruction* lhs, + const HloInstruction* rhs, + const HloInstruction* ehs); // Infers the shape produced by applying the given variadic operation to the // given input operand shapes. static StatusOr InferVariadicOpShape( - VariadicOperation operation, std::vector operand_shapes); + VariadicOperation operation, + tensorflow::gtl::ArraySlice operand_shapes); + static StatusOr InferVariadicOpShape( + HloOpcode opcode, + tensorflow::gtl::ArraySlice operands); // Infers the shape produced by applying the given mapping computation shape // to the given operand shapes. diff --git a/tensorflow/compiler/xla/tests/fusion_test.cc b/tensorflow/compiler/xla/tests/fusion_test.cc index 4356307660..2be409561a 100644 --- a/tensorflow/compiler/xla/tests/fusion_test.cc +++ b/tensorflow/compiler/xla/tests/fusion_test.cc @@ -153,8 +153,8 @@ float FusionTest::ComputeElementwiseAnswer(HloOpcode opcode, } template <> -uint8 FusionTest::ComputeElementwiseAnswer(HloOpcode opcode, - ArraySlice xs) { +bool FusionTest::ComputeElementwiseAnswer(HloOpcode opcode, + ArraySlice xs) { switch (opcode) { case HloOpcode::kEq: return xs[0] == xs[1]; @@ -569,12 +569,12 @@ XLA_TEST_F(FusionTest, DISABLED_ON_CPU(ReduceImplicitBroadcast)) { ShapeUtil::MakeShape(S32, {}), const0, const1, {0}, hlo_module->AddEmbeddedComputation(MakeReduceTestComputation()))); auto negate3 = builder.AddInstruction(HloInstruction::CreateUnary( - ShapeUtil::MakeShape(S32, {1}), HloOpcode::kNegate, reduce2)); + ShapeUtil::MakeShape(S32, {}), HloOpcode::kNegate, reduce2)); hlo_module->AddEntryComputation(builder.Build()) ->CreateFusionInstruction(/*instructions_to_fuse=*/{negate3, reduce2}, HloInstruction::FusionKind::kLoop); - LiteralTestUtil::ExpectEqual(*Literal::CreateR1({-15}), + LiteralTestUtil::ExpectEqual(*Literal::CreateR0(-15), *ExecuteAndTransfer(std::move(hlo_module), {})); } @@ -690,26 +690,24 @@ XLA_TEST_F(FusionTest, Maximum2D) { TestElementwise2D(HloOpcode::kMaximum); } -XLA_TEST_F(FusionTest, Equal2D) { TestElementwise2D(HloOpcode::kEq); } +XLA_TEST_F(FusionTest, Equal2D) { TestElementwise2D(HloOpcode::kEq); } XLA_TEST_F(FusionTest, Inequal2D) { - TestElementwise2D(HloOpcode::kNe); + TestElementwise2D(HloOpcode::kNe); } XLA_TEST_F(FusionTest, Greater2D) { - TestElementwise2D(HloOpcode::kGt); + TestElementwise2D(HloOpcode::kGt); } -XLA_TEST_F(FusionTest, Lesser2D) { - TestElementwise2D(HloOpcode::kLt); -} +XLA_TEST_F(FusionTest, Lesser2D) { TestElementwise2D(HloOpcode::kLt); } XLA_TEST_F(FusionTest, GreaterOrEqual2D) { - TestElementwise2D(HloOpcode::kGe); + TestElementwise2D(HloOpcode::kGe); } XLA_TEST_F(FusionTest, LesserOrEqual2D) { - TestElementwise2D(HloOpcode::kLe); + TestElementwise2D(HloOpcode::kLe); } XLA_TEST_F(FusionTest, Clamp2D) { diff --git a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc index bba5b5aa04..606d801c84 100644 --- a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc +++ b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc @@ -134,7 +134,7 @@ class MultiOutputFusionTest : public HloTestBase { builder.AddInstruction(HloInstruction::CreateReshape( ShapeUtil::MakeShape(F32, {size, 1}), add)); HloInstruction* dot = builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(F32, {}), HloOpcode::kDot, sub, reshape)); + ShapeUtil::MakeShape(F32, {1}), HloOpcode::kDot, sub, reshape)); auto computation = hlo_module->AddEntryComputation(builder.Build(dot)); if (manual_fusion) { @@ -160,7 +160,7 @@ class MultiOutputFusionTest : public HloTestBase { auto p0 = TransferToDevice(input0); auto p1 = TransferToDevice(input1); - Literal expect = *Literal::CreateR0(size * 1.5f * 3.5f); + Literal expect = *Literal::CreateR1({size * 1.5f * 3.5f}); auto actual = ExecuteAndTransfer(std::move(hlo_module), {p0, p1}); LiteralTestUtil::ExpectNear(expect, *actual, error_spec_); } diff --git a/tensorflow/examples/speech_commands/freeze.py b/tensorflow/examples/speech_commands/freeze.py index 9dbf58d1eb..cc2df9660a 100644 --- a/tensorflow/examples/speech_commands/freeze.py +++ b/tensorflow/examples/speech_commands/freeze.py @@ -45,8 +45,8 @@ import sys import tensorflow as tf from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio -import tensorflow.examples.speech_commands.input_data as input_data -import tensorflow.examples.speech_commands.models as models +import input_data +import models from tensorflow.python.framework import graph_util FLAGS = None diff --git a/tensorflow/examples/speech_commands/generate_streaming_test_wav.py b/tensorflow/examples/speech_commands/generate_streaming_test_wav.py index 185beaf3d8..ac7c11856e 100644 --- a/tensorflow/examples/speech_commands/generate_streaming_test_wav.py +++ b/tensorflow/examples/speech_commands/generate_streaming_test_wav.py @@ -46,8 +46,8 @@ import sys import numpy as np import tensorflow as tf -import tensorflow.examples.speech_commands.input_data as input_data -import tensorflow.examples.speech_commands.models as models +import input_data +import models FLAGS = None -- GitLab From 73cf0da3c5aab8e98cd66e91e5714df8196e5755 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 28 Aug 2017 10:56:02 -0700 Subject: [PATCH 019/294] TensorFlowInferenceInterface: Update some old comments. PiperOrigin-RevId: 166726527 --- .../android/TensorFlowInferenceInterface.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java index 9b7f394258..a75ec45e0b 100644 --- a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java +++ b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java @@ -16,8 +16,8 @@ limitations under the License. package org.tensorflow.contrib.android; import android.content.res.AssetManager; -import android.os.Trace; import android.os.Build.VERSION; +import android.os.Trace; import android.text.TextUtils; import android.util.Log; import java.io.FileInputStream; @@ -86,7 +86,7 @@ public class TensorFlowInferenceInterface { throw new RuntimeException("Failed to load model from '" + model + "'", e); } } - + /* * Load a TensorFlow model from provided InputStream. * Note: The InputStream will not be closed after loading model, users need to @@ -96,7 +96,7 @@ public class TensorFlowInferenceInterface { */ public TensorFlowInferenceInterface(InputStream is) { prepareNativeRuntime(); - + // modelName is redundant for model loading from input stream, here is for // avoiding error in initialization as modelName is marked final. this.modelName = ""; @@ -191,8 +191,9 @@ public class TensorFlowInferenceInterface { } /** - * Cleans up the state associated with this Object. initializeTensorFlow() can then be called - * again to initialize a new session. + * Cleans up the state associated with this Object. + * + *

The TenosrFlowInferenceInterface object is no longer usable after this method returns. */ public void close() { closeFeeds(); @@ -417,7 +418,7 @@ public class TensorFlowInferenceInterface { public void fetch(String outputName, ByteBuffer dst) { getTensor(outputName).writeTo(dst); } - + private void prepareNativeRuntime() { Log.i(TAG, "Checking to see if TensorFlow native methods are already loaded"); try { @@ -442,7 +443,7 @@ public class TensorFlowInferenceInterface { final long startMs = System.currentTimeMillis(); if (VERSION.SDK_INT >= 18) { - Trace.beginSection("initializeTensorFlow"); + Trace.beginSection("loadGraph"); Trace.beginSection("readGraphDef"); } @@ -470,7 +471,7 @@ public class TensorFlowInferenceInterface { if (VERSION.SDK_INT >= 18) { Trace.endSection(); // importGraphDef. - Trace.endSection(); // initializeTensorFlow. + Trace.endSection(); // loadGraph. } final long endMs = System.currentTimeMillis(); @@ -541,7 +542,7 @@ public class TensorFlowInferenceInterface { fetchNames.clear(); } - // State immutable between initializeTensorFlow calls. + // Immutable state. private final String modelName; private final Graph g; private final Session sess; -- GitLab From 78da113f02668bc000565773e1e1c8e180cb2fcf Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 28 Aug 2017 10:56:26 -0700 Subject: [PATCH 020/294] Don't record persistent memory usage of typed queues since the code crashes when running on GPU. PiperOrigin-RevId: 166726596 --- tensorflow/core/kernels/queue_op.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tensorflow/core/kernels/queue_op.h b/tensorflow/core/kernels/queue_op.h index 99d2d19bfd..005ea6e26d 100644 --- a/tensorflow/core/kernels/queue_op.h +++ b/tensorflow/core/kernels/queue_op.h @@ -56,13 +56,6 @@ class TypedQueueOp : public QueueOp { public: using QueueOp::QueueOp; - void Compute(OpKernelContext* context) override { - QueueOp::Compute(context); - if (queue_ && context->track_allocations()) { - context->record_host_persistent_memory_allocation(queue_->MemoryUsed()); - } - } - protected: template Status CreateTypedQueue(TypedQueue* queue, QueueInterface** ret) { -- GitLab From ffe641b13a926edc3e0625f7947134d24aef484a Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Mon, 28 Aug 2017 11:05:44 -0700 Subject: [PATCH 021/294] Do not keep unused, folded nodes in the graph to reduce the graph size. PiperOrigin-RevId: 166728144 --- .../grappler/optimizers/constant_folding.cc | 61 ++++- .../grappler/optimizers/constant_folding.h | 4 +- .../optimizers/constant_folding_test.cc | 242 ++++++++++++++---- tensorflow/core/grappler/utils.cc | 43 +++- tensorflow/core/grappler/utils.h | 11 +- .../python/grappler/memory_optimizer_test.py | 2 + .../python/grappler/tf_optimizer_test.py | 6 +- 7 files changed, 300 insertions(+), 69 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 016f78fcc5..a830af05e1 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -230,6 +230,7 @@ Status ConstantFolding::MaterializeShapes(const GrapplerItem& item, CHECK_LE(1, node.input_size()); string ctrl_dep = AddControlDependency(node.input(0)); node.set_input(0, ctrl_dep); + node_map_->AddOutput(NodeName(ctrl_dep), node.name()); } } } @@ -462,7 +463,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, return Status::OK(); } -Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output) { +Status ConstantFolding::FoldNode(NodeDef* node) { if (IsMerge(*node)) { // Merge nodes are special, in the sense that they execute as soon as one of // their input is ready. We can therefore fold a merge node iff it has at @@ -511,14 +512,15 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output) { "already present in the graph")); } - NodeDef* const_out = output->add_node(); + NodeDef* const_out = added_graph_.add_node(); *const_out = *input_node; const_out->set_name(const_out_name); const_out->set_device(node->device()); *const_out->add_input() = AsControlDependency(*node); node_map_->AddNode(const_out->name(), const_out); + node_map_->AddOutput(node->name(), const_out->name()); - NodeDef* const_index = output->add_node(); + NodeDef* const_index = added_graph_.add_node(); const_index->set_op("Const"); Tensor index(DT_INT32, TensorShape({})); index.flat()(0) = input_index; @@ -529,6 +531,7 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output) { const_index->set_device(node->device()); *const_index->add_input() = AsControlDependency(*node); node_map_->AddNode(const_index->name(), const_index); + node_map_->AddOutput(node->name(), const_index->name()); auto outputs = node_map_->GetOutputs(node->name()); for (auto& output : outputs) { @@ -538,8 +541,10 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output) { if (node_name == node->name()) { if (position == 0) { *output->mutable_input(i) = const_out->name(); + node_map_->AddOutput(const_out->name(), output->name()); } else if (position == 1) { *output->mutable_input(i) = const_index->name(); + node_map_->AddOutput(const_index->name(), output->name()); } else { // This is a control dependency (or an invalid edge since the // merge node has only 2 inputs): preserve them. @@ -565,6 +570,7 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output) { continue; } + // Forward control dependencies. for (const auto& input : node->input()) { if (IsControlInput(input)) { *const_node->add_input() = input; @@ -582,8 +588,15 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output) { // create new nodes otherwise. if (const_nodes.size() == 1) { node->set_op("Const"); + // Note we need to clear the inputs in NodeMap before we clear the inputs + // in the node, otherwise NodeMap would see empty inputs and effectively + // does nothing. + node_map_->RemoveInputs(node->name()); node->clear_input(); *node->mutable_input() = const_node->input(); + for (const auto& input : node->input()) { + node_map_->AddOutput(NodeName(input), node->name()); + } *node->mutable_attr() = const_node->attr(); break; } else { @@ -592,10 +605,13 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output) { return errors::AlreadyExists(strings::StrCat( const_node->name(), "already present in the graph")); } - NodeDef* added_node = output->add_node(); + NodeDef* added_node = added_graph_.add_node(); *added_node = *const_node; added_node->set_device(node->device()); node_map_->AddNode(added_node->name(), added_node); + for (const auto& input : added_node->input()) { + node_map_->AddOutput(NodeName(input), added_node->name()); + } // All the constant nodes encoding output values have the same control // dependencies (since these are the control dependencies of the node // we're trying to fold). Record one such constant node. @@ -614,11 +630,15 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output) { // Propagate control dependencies if possible. If not, we'll just // preserve the existing control dependencies. if (constant_output != nullptr) { + node_map_->UpdateInput(node_name, NodeName(output->input(i)), + constant_output->name()); *output->mutable_input(i) = AsControlDependency(*constant_output); } } else if (position < const_nodes.size() && !const_nodes[position].name().empty()) { // Replace alive outputs with the corresponding constant. + node_map_->UpdateInput(output->name(), NodeName(output->input(i)), + const_nodes[position].name()); *output->mutable_input(i) = const_nodes[position].name(); } else { // Leave this edge alone. @@ -629,6 +649,12 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output) { } } } + outputs = node_map_->GetOutputs(node->name()); + if (outputs.empty() && has_fetch_ && + nodes_to_preserve_.find(node->name()) == nodes_to_preserve_.end()) { + node_map_->RemoveInputs(node->name()); + node->clear_input(); + } } return Status::OK(); } @@ -648,12 +674,13 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { if (processed_nodes.count(node->name())) { continue; } - Status s = FoldNode(node, output); + // We need to record a copy of output nodes before FoldNode() modifies it. + std::set outputs = node_map_->GetOutputs(node->name()); + Status s = FoldNode(node); processed_nodes.insert(node->name()); if (!s.ok()) { VLOG(1) << "Failed to fold node " << node->name() << ": " << s; } else { - auto outputs = node_map_->GetOutputs(node->name()); for (auto& output : outputs) { if (IsFoldable(*output)) { queue.push_back(output); @@ -662,11 +689,24 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { } } - // Build the graph after constant folding. Note that we keep all processed - // nodes in the graph in case users need to fetch their values. + // Build the graph after constant folding. + for (const auto& node : added_graph_.node()) { + auto outputs = node_map_->GetOutputs(node.name()); + if (!outputs.empty()) { + auto added_node = output->add_node(); + *added_node = node; + } + } for (const auto& node : graph_.node()) { - auto added_node = output->add_node(); - *added_node = node; + // If no fetch nodes is provided, we conservatively + // keep all nodes in the original graph in case users need to fetch + // their values. + auto outputs = node_map_->GetOutputs(node.name()); + if (!outputs.empty() || !has_fetch_ || + nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { + auto added_node = output->add_node(); + *added_node = node; + } } return Status::OK(); } @@ -814,6 +854,7 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, *output = GraphDef(); bool has_feed = !item.feed.empty(); + has_fetch_ = !item.fetch.empty(); GraphProperties properties(item); if (!has_feed) { diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 82ca565910..26a984205b 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -60,7 +60,7 @@ class ConstantFolding : public GraphOptimizer { Status EvaluateOneFoldable(const NodeDef& node, std::vector* outputs); - Status FoldNode(NodeDef* node, GraphDef* output); + Status FoldNode(NodeDef* node); Status FoldGraph(GraphDef* output); @@ -74,6 +74,8 @@ class ConstantFolding : public GraphOptimizer { GraphDef graph_; std::unique_ptr node_map_; std::set nodes_to_preserve_; + GraphDef added_graph_; + bool has_fetch_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index a1d35da5ac..2db843f2a4 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -62,30 +62,27 @@ TEST_F(ConstantFoldingTest, SimpleFolding) { Status status = fold.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(4, output.node_size()); - - const NodeDef& node_a = output.node(0); - EXPECT_EQ("a", node_a.name()); + EXPECT_EQ(3, output.node_size()); - const NodeDef& node_b = output.node(1); + const NodeDef& node_b = output.node(0); EXPECT_EQ("b", node_b.name()); - const NodeDef& node_c = output.node(2); + const NodeDef& node_c = output.node(1); EXPECT_EQ("c", node_c.name()); EXPECT_EQ("Const", node_c.op()); EXPECT_EQ("/CPU:0", node_c.device()); - const NodeDef& node_d = output.node(3); + const NodeDef& node_d = output.node(2); EXPECT_EQ("d", node_d.name()); EXPECT_EQ("c", node_d.input(1)); EXPECT_EQ("", node_d.device()); - std::vector fetch = {"a", "b", "c", "d"}; + std::vector fetch = {"b", "c", "d"}; auto tensors_expected = EvaluateNodes(item.graph, fetch); auto tensors = EvaluateNodes(output, fetch); - EXPECT_EQ(4, tensors_expected.size()); - EXPECT_EQ(4, tensors.size()); - for (int i = 0; i < 4; i++) { + EXPECT_EQ(fetch.size(), tensors_expected.size()); + EXPECT_EQ(fetch.size(), tensors.size()); + for (int i = 0; i < fetch.size(); i++) { test::ExpectTensorEqual(tensors_expected[i], tensors[i]); } } @@ -98,10 +95,12 @@ TEST_F(ConstantFoldingTest, FoldingNodeWithTwoOutputs) { auto b = ops::Unique(s.WithOpName("b"), {a}); Output c = ops::Identity(s.WithOpName("c"), {b.y}); Output d = ops::Identity(s.WithOpName("d"), {b.idx}); + Output e = ops::Identity(s.WithOpName("e"), {c}); + Output f = ops::Identity(s.WithOpName("f"), {d}); GrapplerItem item; - item.fetch.push_back("c"); - item.fetch.push_back("d"); + item.fetch.push_back("e"); + item.fetch.push_back("f"); TF_CHECK_OK(s.ToGraphDef(&item.graph)); ConstantFolding fold; @@ -109,36 +108,30 @@ TEST_F(ConstantFoldingTest, FoldingNodeWithTwoOutputs) { Status status = fold.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(6, output.node_size()); - - const NodeDef& new_b_0 = output.node(0); - EXPECT_EQ("ConstantFolding/b-0", new_b_0.name()); - EXPECT_EQ("Const", new_b_0.op()); - - const NodeDef& new_b_1 = output.node(1); - EXPECT_EQ("ConstantFolding/b-1", new_b_1.name()); - EXPECT_EQ("Const", new_b_1.op()); - - const NodeDef& new_a = output.node(2); - EXPECT_EQ("a", new_a.name()); - - const NodeDef& new_b = output.node(3); - EXPECT_EQ("b", new_b.name()); + EXPECT_EQ(4, output.node_size()); - const NodeDef& new_c = output.node(4); + const NodeDef& new_c = output.node(0); EXPECT_EQ("c", new_c.name()); - EXPECT_EQ("ConstantFolding/b-0", new_c.input(0)); + EXPECT_EQ("Const", new_c.op()); - const NodeDef& new_d = output.node(5); + const NodeDef& new_d = output.node(1); EXPECT_EQ("d", new_d.name()); - EXPECT_EQ("ConstantFolding/b-1", new_d.input(0)); + EXPECT_EQ("Const", new_d.op()); + + const NodeDef& new_e = output.node(2); + EXPECT_EQ("e", new_e.name()); + EXPECT_EQ("c", new_e.input(0)); - std::vector fetch = {"a", "b", "c", "d"}; + const NodeDef& new_f = output.node(3); + EXPECT_EQ("f", new_f.name()); + EXPECT_EQ("d", new_f.input(0)); + + std::vector fetch = {"e", "f"}; auto tensors_expected = EvaluateNodes(item.graph, fetch); auto tensors = EvaluateNodes(output, fetch); - EXPECT_EQ(4, tensors_expected.size()); - EXPECT_EQ(4, tensors.size()); - for (int i = 0; i < 4; i++) { + EXPECT_EQ(fetch.size(), tensors_expected.size()); + EXPECT_EQ(fetch.size(), tensors.size()); + for (int i = 0; i < fetch.size(); i++) { test::ExpectTensorEqual(tensors_expected[i], tensors[i]); } } @@ -156,7 +149,50 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { Output i3 = ops::Identity(scope.WithOpName("e"), {i2}); GrapplerItem item; - item.fetch.push_back("i3"); + item.fetch.push_back("e"); + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + + ConstantFolding fold; + GraphDef output; + Status status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::vector expected_nodes = {"dflt", "p1", "p2", "i2", "e"}; + EXPECT_EQ(output.node_size(), expected_nodes.size()); + int i = 0; + int found = 0; + for (const auto& node : output.node()) { + EXPECT_EQ(expected_nodes[i], output.node(i).name()); + i++; + if (node.name() == "i2") { + EXPECT_EQ("Const", node.op()); + ++found; + auto folded = EvaluateNodes(output, {"i2"}); + auto expected = EvaluateNodes(item.graph, {"i2"}); + EXPECT_EQ(1, expected.size()); + EXPECT_EQ(1, folded.size()); + test::ExpectTensorEqual(folded[0], expected[0]); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("^p1", node.input(0)); + EXPECT_EQ("^p2", node.input(1)); + } + } + EXPECT_EQ(1, found); +} + +TEST_F(ConstantFoldingTest, ControlDependenciesEmptyFetch) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + Output dflt = ops::Const(scope.WithOpName("dflt"), 3.14f, {1}); + Output p1 = ops::PlaceholderWithDefault(scope.WithOpName("p1"), dflt, {1}); + Output p2 = ops::PlaceholderWithDefault(scope.WithOpName("p2"), dflt, {1}); + Output c = + ops::Const(scope.WithOpName("c").WithControlDependencies(p1), 10, {3}); + Output i1 = ops::Identity(scope.WithOpName("i1"), {c}); + Output i2 = + ops::Identity(scope.WithOpName("i2").WithControlDependencies(p2), {i1}); + Output i3 = ops::Identity(scope.WithOpName("e"), {i2}); + + GrapplerItem item; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); ConstantFolding fold; @@ -164,8 +200,14 @@ TEST_F(ConstantFoldingTest, ControlDependencies) { Status status = fold.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + std::vector expected_nodes = {"dflt", "p1", "p2", "c", + "i1", "i2", "e"}; + EXPECT_EQ(output.node_size(), expected_nodes.size()); + int i = 0; int found = 0; for (const auto& node : output.node()) { + EXPECT_EQ(expected_nodes[i], output.node(i).name()); + i++; if (node.name() == "i1") { EXPECT_EQ("Const", node.op()); ++found; @@ -290,6 +332,41 @@ TEST_F(ConstantFoldingTest, ShapeMaterialization) { Status status = fold.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + int found = 0; + for (const auto& node : output.node()) { + if (node.name() == "shape") { + ++found; + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^v2", node.input(0)); + Tensor value; + CHECK(value.FromProto(node.attr().at("value").tensor())); + EXPECT_EQ(5, value.flat()(0)); + EXPECT_EQ(7, value.flat()(1)); + } + } + EXPECT_EQ(1, found); +} + +TEST_F(ConstantFoldingTest, ShapeMaterializationEmptyFetch) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + Output v1 = ops::Variable(scope.WithOpName("v1"), {3}, DT_FLOAT); + Output v2 = ops::Variable(scope.WithOpName("v2"), {5, 7}, DT_FLOAT); + Output v3 = ops::Variable(scope.WithOpName("v3"), {11, 13}, DT_FLOAT); + Output rank = ops::Rank(scope.WithOpName("rank"), v1); + Output shape = ops::Shape(scope.WithOpName("shape"), v2); + Output size = ops::Size(scope.WithOpName("size"), v3); + Output p1 = ops::Multiply(scope.WithOpName("p1"), size, rank); + Output p2 = ops::Multiply(scope.WithOpName("p2"), p1, shape); + + GrapplerItem item; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + + ConstantFolding fold; + GraphDef output; + Status status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + int found = 0; for (const auto& node : output.node()) { if (node.name() == "size") { @@ -322,7 +399,7 @@ TEST_F(ConstantFoldingTest, ShapeMaterialization) { EXPECT_EQ(3, found); } -TEST_F(ConstantFoldingTest, SwitchNodes) { +TEST_F(ConstantFoldingTest, SwitchNodesEmptyFetch) { tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); ops::Variable v_in(scope.WithOpName("v_in"), {3}, DT_FLOAT); ops::Variable v_ctrl(scope.WithOpName("v_ctrl"), {}, DT_BOOL); @@ -345,9 +422,6 @@ TEST_F(ConstantFoldingTest, SwitchNodes) { {statically_known.output, never_generated.output}); GrapplerItem item; - item.fetch.push_back("m"); - item.fetch.push_back("m2"); - TF_CHECK_OK(scope.ToGraphDef(&item.graph)); ConstantFolding fold; @@ -355,31 +429,111 @@ TEST_F(ConstantFoldingTest, SwitchNodes) { Status status = fold.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); + std::set present_nodes = {"v_in", "v_ctrl", + "switch", "i", + "p1", "p2", + "m", "false", + "constant", "switch2", + "i2", "i3", + "m2", "ConstantFoldingCtrl/switch_0", + "rank", "size"}; + std::set not_present_nodes = {"ConstantFolding/switch2-0"}; + EXPECT_EQ(present_nodes.size(), output.node_size()); + int found = 0; for (const auto& node : output.node()) { + EXPECT_TRUE(present_nodes.find(node.name()) != present_nodes.end()); + EXPECT_TRUE(not_present_nodes.find(node.name()) == not_present_nodes.end()); + present_nodes.erase(node.name()); + not_present_nodes.erase(node.name()); if (node.name() == "rank") { + ++found; EXPECT_EQ("Const", node.op()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("^ConstantFoldingCtrl/switch_0", node.input(0)); } if (node.name() == "size") { + ++found; EXPECT_EQ("Const", node.op()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("^i", node.input(0)); } - if (node.name() == "ConstantFolding/switch2-0") { + if (node.name() == "i2") { + ++found; EXPECT_EQ("Const", node.op()); EXPECT_EQ(0, node.input_size()); } - if (node.name() == "ConstantFolding/i2") { + if (node.name() == "i3") { + ++found; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("switch2:1", node.input(0)); + } + } + EXPECT_EQ(4, found); +} + +TEST_F(ConstantFoldingTest, SwitchNodes) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + ops::Variable v_in(scope.WithOpName("v_in"), {3}, DT_FLOAT); + ops::Variable v_ctrl(scope.WithOpName("v_ctrl"), {}, DT_BOOL); + ops::Switch s1(scope.WithOpName("switch"), v_in, v_ctrl); + ops::Rank rank(scope.WithOpName("rank"), s1.output_false); + ops::Identity i(scope.WithOpName("i"), s1.output_true); + ops::Size size(scope.WithOpName("size"), i); + ops::Square p1(scope.WithOpName("p1"), rank); + ops::Square p2(scope.WithOpName("p2"), size); + ops::Merge m(scope.WithOpName("m"), {p1.y, p2.y}); + + Output predicate = + ops::Const(scope.WithOpName("false"), false, TensorShape({})); + Output constant = + ops::Const(scope.WithOpName("constant"), 1.0f, TensorShape({1})); + ops::Switch s2(scope.WithOpName("switch2"), constant, predicate); + ops::Identity statically_known(scope.WithOpName("i2"), s2.output_false); + ops::Identity never_generated(scope.WithOpName("i3"), s2.output_true); + ops::Merge m2(scope.WithOpName("m2"), + {statically_known.output, never_generated.output}); + + GrapplerItem item; + item.fetch.push_back("m"); + item.fetch.push_back("m2"); + + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + + ConstantFolding fold; + GraphDef output; + Status status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + std::set present_nodes = {"v_in", "v_ctrl", + "switch", "i", + "p1", "p2", + "m", "false", + "constant", "switch2", + "i2", "i3", + "m2", "ConstantFoldingCtrl/switch_0"}; + std::set not_present_nodes = {"rank", "size", + "ConstantFolding/switch2-0"}; + EXPECT_EQ(present_nodes.size(), output.node_size()); + + int found = 0; + for (const auto& node : output.node()) { + EXPECT_TRUE(present_nodes.find(node.name()) != present_nodes.end()); + EXPECT_TRUE(not_present_nodes.find(node.name()) == not_present_nodes.end()); + present_nodes.erase(node.name()); + not_present_nodes.erase(node.name()); + if (node.name() == "i2") { + ++found; EXPECT_EQ("Const", node.op()); EXPECT_EQ(0, node.input_size()); } if (node.name() == "i3") { + ++found; EXPECT_EQ("Identity", node.op()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("switch2:1", node.input(0)); } } + EXPECT_EQ(2, found); } TEST_F(ConstantFoldingTest, MergeNodes) { @@ -411,7 +565,7 @@ TEST_F(ConstantFoldingTest, MergeNodes) { ops::Identity idx3(scope.WithOpName("idx3"), m3.value_index); GrapplerItem item; - item.fetch.push_back("out1, idx1, out2, idx2, out3, idx3"); + item.fetch = {"out1", "idx1", "out2", "idx2", "out3", "idx3"}; TF_CHECK_OK(scope.ToGraphDef(&item.graph)); ConstantFolding fold; diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index add50d8b14..e2f722a8b1 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -52,18 +52,45 @@ const std::set& NodeMap::GetOutputs(const string& node_name) const { } void NodeMap::AddNode(const string& name, NodeDef* node) { - nodes_.insert(std::make_pair(name, node)); + auto ret = nodes_.insert(std::make_pair(name, node)); + CHECK(ret.second) << "Pair (" << name << "," << node + << ") is not inserted because a same key already exists."; } -void NodeMap::AddOutput(const string& node, const string& output) { - outputs_[node].insert(nodes_[output]); +void NodeMap::AddOutput(const string& node_name, const string& output_name) { + auto output_node = nodes_[output_name]; + CHECK(output_node) << "Output node " << output_name + << " is missing in NodeMap."; + outputs_[node_name].insert(output_node); } -void NodeMap::UpdateOutput(const string& node, const string& old_output, - const string& new_output) { - std::set& outputs = outputs_[node]; - outputs.erase(nodes_[old_output]); - outputs.insert(nodes_[new_output]); +void NodeMap::RemoveOutput(const string& node_name, const string& output_name) { + outputs_[node_name].erase(nodes_[output_name]); +} + +void NodeMap::UpdateInput(const string& node_name, const string& old_input_name, + const string& new_input_name) { + RemoveOutput(old_input_name, node_name); + AddOutput(new_input_name, node_name); +} + +void NodeMap::RemoveInputs(const string& node_name) { + auto node = nodes_[node_name]; + for (const auto& input : node->input()) { + RemoveOutput(NodeName(input), node->name()); + } +} + +void NodeMap::RemoveOutputs(const string& node_name) { + outputs_.erase(node_name); +} + +void NodeMap::UpdateOutput(const string& node_name, + const string& old_output_name, + const string& new_output_name) { + std::set& outputs = outputs_[node_name]; + outputs.erase(nodes_[old_output_name]); + outputs.insert(nodes_[new_output_name]); } bool IsSameInput(const string& name1, const string& name2) { diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index a49791bad8..4a8cb573d8 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -36,9 +36,14 @@ class NodeMap { // This method doesn't record the outputs of the added node; the outputs need // to be explicitly added by the AddOutput method. void AddNode(const string& name, NodeDef* node); - void AddOutput(const string& node, const string& output); - void UpdateOutput(const string& node, const string& old_output, - const string& new_output); + void UpdateInput(const string& node_name, const string& old_input_name, + const string& new_input_name); + void AddOutput(const string& node_name, const string& output_name); + void RemoveInputs(const string& node_name); + void RemoveOutput(const string& node_name, const string& output_name); + void RemoveOutputs(const string& node_name); + void UpdateOutput(const string& node_name, const string& old_output_name, + const string& new_output_name); private: GraphDef* graph_; diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py index 78f819e8c4..46f03952fa 100644 --- a/tensorflow/python/grappler/memory_optimizer_test.py +++ b/tensorflow/python/grappler/memory_optimizer_test.py @@ -50,6 +50,7 @@ class MemoryOptimizerSwapTest(test.TestCase): rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, + constant_folding=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL) graph = tf_optimizer.OptimizeGraph(rewriter_config, mg) @@ -72,6 +73,7 @@ class MemoryOptimizerSwapTest(test.TestCase): rewriter_config = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, + constant_folding=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL) graph = tf_optimizer.OptimizeGraph(rewriter_config, mg) diff --git a/tensorflow/python/grappler/tf_optimizer_test.py b/tensorflow/python/grappler/tf_optimizer_test.py index 522d1baf29..1a348ede9b 100644 --- a/tensorflow/python/grappler/tf_optimizer_test.py +++ b/tensorflow/python/grappler/tf_optimizer_test.py @@ -36,6 +36,7 @@ class PyWrapOptimizeGraphTest(test.TestCase): c = math_ops.add_n([a, b], name='c') d = math_ops.add_n([b, c], name='d') train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) + # Being a train_op will make 'd' to be added as a fetch node. train_op.append(d) mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph()) @@ -44,9 +45,8 @@ class PyWrapOptimizeGraphTest(test.TestCase): graph = tf_optimizer.OptimizeGraph(rewriter_config, mg) - self.assertEqual(len(graph.node), 4) - self.assertItemsEqual([node.name - for node in graph.node], ['a', 'b', 'c', 'd']) + self.assertEqual(len(graph.node), 3) + self.assertItemsEqual([node.name for node in graph.node], ['b', 'c', 'd']) if __name__ == '__main__': -- GitLab From 298fdaf1da3288d982d90ea34e61beaa88c92411 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 28 Aug 2017 11:12:54 -0700 Subject: [PATCH 022/294] shape_tuple optimization in autograd PiperOrigin-RevId: 166729217 --- tensorflow/python/eager/tensor.py | 3 +++ tensorflow/python/eager/tensor_node.py | 12 ++++++++++-- tensorflow/python/framework/ops.py | 16 +++++++++++++++- tensorflow/python/framework/ops_test.py | 5 +++++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/tensor.py b/tensorflow/python/eager/tensor.py index 69269d1975..17e594d5c3 100644 --- a/tensorflow/python/eager/tensor.py +++ b/tensorflow/python/eager/tensor.py @@ -51,3 +51,6 @@ class LazyZero(object): def numpy(self): return np.zeros(self.shape, self.dtype) + + def _shape_tuple(self): + return self.shape diff --git a/tensorflow/python/eager/tensor_node.py b/tensorflow/python/eager/tensor_node.py index 8200761d03..ae5afaa970 100644 --- a/tensorflow/python/eager/tensor_node.py +++ b/tensorflow/python/eager/tensor_node.py @@ -250,10 +250,18 @@ class TensorVSpace(ag_core.VSpace): if isinstance(value, ops.IndexedSlices): self.shape = tensor_shape.TensorShape(value.dense_shape.numpy()) self.dtype = value.values.dtype + self.size = self.shape.num_elements() else: - self.shape = value.shape + self.shape = value._shape_tuple() # pylint: disable=protected-access + if self.shape is None or None in self.shape: + # TODO(apassos) we currently don't check the size so this is fine, but + # presumably there should be a better way of doing this. + self.size = 1 + else: + self.size = 1 + for s in self.shape: + self.size *= s self.dtype = value.dtype - self.size = self.shape.num_elements() # TODO(apassos) put gradients on the same device as ops. def __eq__(self, other): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 72d9bd115e..4284a51a57 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -372,6 +372,12 @@ class Tensor(_TensorLike): else: return None + def _shape_tuple(self): + shape = self._shape_as_list() + if shape is None: + return None + return tuple(shape) + def get_shape(self): """Alias of Tensor.shape.""" return self.shape @@ -1011,12 +1017,20 @@ def internal_convert_to_tensor(value, RuntimeError: If a registered conversion function returns an invalid value. """ + # Fast path for removing unnecessary convert-to-tensors. + if (isinstance(ag_core.getval(value), Tensor) + and (dtype is None or dtype == value.dtype) + and context.in_eager_mode()): + return value error_prefix = "" if name is None else "%s: " % name if dtype is not None: dtype = dtypes.as_dtype(dtype) for _, funcs_at_priority in sorted(_tensor_conversion_func_registry.items()): for base_type, conversion_func in funcs_at_priority: - if isinstance(value, base_type): + # Note we check the type of the object unwrapped from an autograd node, if + # tracing gradients, to ensure the same behavior happens with and without + # tracing. + if isinstance(ag_core.getval(value), base_type): # If dtype is None but preferred_dtype is not None, we try to # cast to preferred_dtype first. ret = None diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 61cc1ff31d..72964ca925 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -291,6 +291,11 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertAllEqual((4, 1), tensor.get_shape().as_list()) self.assertAllEqual(values, tensor.eval()) + def testShapeTuple(self): + with self.test_session(): + c = constant_op.constant(1) + self.assertEqual(c._shape_tuple(), ()) # pylint: disable=protected-access + def testConvertToTensorEager(self): with context.eager_mode(): t = ops.EagerTensor(1) -- GitLab From 86f5b7776e9e22efcd400ecff0d1d28f13c679e3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 28 Aug 2017 11:27:16 -0700 Subject: [PATCH 023/294] Fix ios_tensorflow_lib build failure on Mac. PiperOrigin-RevId: 166731436 --- tensorflow/tools/proto_text/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/proto_text/BUILD b/tensorflow/tools/proto_text/BUILD index 6607f629e7..3a60c8c958 100644 --- a/tensorflow/tools/proto_text/BUILD +++ b/tensorflow/tools/proto_text/BUILD @@ -34,7 +34,7 @@ cc_binary( visibility = ["//tensorflow:internal"], deps = [ ":gen_proto_text_functions_lib", - "//tensorflow/core:lib", + "//tensorflow/core:lib_proto_parsing", ], ) -- GitLab From 2a6c8897f59e2cbf943f52b222a1968fa7e2f158 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Mon, 28 Aug 2017 11:39:05 -0700 Subject: [PATCH 024/294] Updating the datasets map API documentation. PiperOrigin-RevId: 166733252 --- .../contrib/data/python/ops/dataset_ops.py | 31 ++++++++++++------- tensorflow/core/kernels/captured_function.cc | 13 +++----- tensorflow/core/kernels/captured_function.h | 4 +-- tensorflow/core/kernels/filter_dataset_op.cc | 3 +- .../core/kernels/flat_map_dataset_op.cc | 4 +-- .../kernels/group_by_window_dataset_op.cc | 6 ++-- .../core/kernels/interleave_dataset_op.cc | 4 +-- tensorflow/core/kernels/map_dataset_op.cc | 3 +- .../core/kernels/parallel_map_dataset_op.cc | 2 +- 9 files changed, 35 insertions(+), 35 deletions(-) diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index cb6062ceb2..e692822aaf 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -1017,7 +1017,7 @@ class Dataset(object): d = d.shard(FLAGS.num_workers, FLAGS.worker_index) d = d.repeat(FLAGS.num_epochs) d = d.shuffle(FLAGS.shuffle_buffer_size) - d = d.map(parser_fn, num_threads=FLAGS.num_map_threads) + d = d.map(parser_fn, num_parallel_calls=FLAGS.num_map_threads) ``` Important caveats: @@ -1037,7 +1037,7 @@ class Dataset(object): d = d.repeat() d = d.interleave(tf.contrib.data.TFRecordDataset, cycle_length=FLAGS.num_readers, block_length=1) - d = d.map(parser_fn, num_threads=FLAGS.num_map_threads) + d = d.map(parser_fn, num_parallel_calls=FLAGS.num_map_threads) ``` Args: @@ -1205,28 +1205,35 @@ class Dataset(object): """ return GroupByWindowDataset(self, key_func, reduce_func, window_size) - def map(self, map_func, num_threads=None, output_buffer_size=None): + def map(self, + map_func, + num_threads=None, + output_buffer_size=None, + num_parallel_calls=None): """Maps `map_func` across this datset. Args: map_func: A function mapping a nested structure of tensors (having shapes and types defined by `self.output_shapes` and `self.output_types`) to another nested structure of tensors. - num_threads: (Optional.) A `tf.int32` scalar `tf.Tensor`, representing - the number of threads to use for processing elements in parallel. If - not specified, elements will be processed sequentially without - buffering. + num_threads: (Optional.) Deprecated, use `num_parallel_calls` instead. output_buffer_size: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the maximum number of processed elements that will be - buffered when processing in parallel. + buffered. + num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`, + representing the number elements to process in parallel. If not + specified, elements will be processed sequentially. Returns: A `Dataset`. """ - if num_threads is None: + if num_threads is None and num_parallel_calls is None: ret = MapDataset(self, map_func) else: - ret = ParallelMapDataset(self, map_func, num_threads) + if num_threads is None: + ret = ParallelMapDataset(self, map_func, num_parallel_calls) + else: + ret = ParallelMapDataset(self, map_func, num_threads) if output_buffer_size is not None: ret = ret.prefetch(output_buffer_size) return ret @@ -1255,8 +1262,8 @@ class Dataset(object): # each file. filenames = ["/var/data/file1.txt", "/var/data/file2.txt", ..."] dataset = (Dataset.from_tensor_slices(filenames) - .interleave( - lambda x: TextLineDataset(x).map(parse_fn, num_threads=1), + .interleave(lambda x: + TextLineDataset(x).map(parse_fn, num_parallel_calls=1), cycle_length=4, block_length=16)) ``` diff --git a/tensorflow/core/kernels/captured_function.cc b/tensorflow/core/kernels/captured_function.cc index 8e6e2db07e..87751b3cd5 100644 --- a/tensorflow/core/kernels/captured_function.cc +++ b/tensorflow/core/kernels/captured_function.cc @@ -105,8 +105,7 @@ Status CapturedFunction::Create( Status CapturedFunction::Run(FunctionLibraryRuntime::Options f_opts, gtl::ArraySlice args, - std::vector* rets, const string& prefix) { - port::Tracing::TraceMe activity(prefix, "::Run"); + std::vector* rets) { Notification n; Status s; auto done_callback = [&n, &s](Status func_status) { @@ -128,17 +127,15 @@ Status CapturedFunction::Run(FunctionLibraryRuntime::Options f_opts, void CapturedFunction::RunAsync(FunctionLibraryRuntime::Options f_opts, gtl::ArraySlice args, - std::vector* rets, const string& prefix, + std::vector* rets, FunctionLibraryRuntime::DoneCallback done) { - auto activity = new port::Tracing::TraceMe(prefix, "::RunAsync"); auto c_mgr = new CancellationManager; f_opts.cancellation_manager = c_mgr; FunctionLibraryRuntime::DoneCallback wrapped_done = std::bind( - [activity, c_mgr](FunctionLibraryRuntime::DoneCallback done, - // Begin unbound arguments. - Status s) { + [c_mgr](FunctionLibraryRuntime::DoneCallback done, + // Begin unbound arguments. + Status s) { delete c_mgr; - delete activity; done(s); }, std::move(done), std::placeholders::_1); diff --git a/tensorflow/core/kernels/captured_function.h b/tensorflow/core/kernels/captured_function.h index b5a18fd90e..f0aca4d239 100644 --- a/tensorflow/core/kernels/captured_function.h +++ b/tensorflow/core/kernels/captured_function.h @@ -61,12 +61,10 @@ class CapturedFunction { std::unique_ptr* out_function); Status Run(FunctionLibraryRuntime::Options f_opts, - gtl::ArraySlice args, std::vector* rets, - const string& prefix); + gtl::ArraySlice args, std::vector* rets); void RunAsync(FunctionLibraryRuntime::Options f_opts, gtl::ArraySlice args, std::vector* rets, - const string& prefix, FunctionLibraryRuntime::DoneCallback done); const Device* device() const { return device_; } diff --git a/tensorflow/core/kernels/filter_dataset_op.cc b/tensorflow/core/kernels/filter_dataset_op.cc index 05d6f6cae1..1e1a3ad75d 100644 --- a/tensorflow/core/kernels/filter_dataset_op.cc +++ b/tensorflow/core/kernels/filter_dataset_op.cc @@ -120,8 +120,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { Notification n; Status ret; std::vector result; - ret = dataset()->captured_func_->Run(opts, *out_tensors, &result, - prefix()); + ret = dataset()->captured_func_->Run(opts, *out_tensors, &result); if (!ret.ok()) { return ret; diff --git a/tensorflow/core/kernels/flat_map_dataset_op.cc b/tensorflow/core/kernels/flat_map_dataset_op.cc index a60a7a5ff6..e2310fecc7 100644 --- a/tensorflow/core/kernels/flat_map_dataset_op.cc +++ b/tensorflow/core/kernels/flat_map_dataset_op.cc @@ -137,8 +137,8 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel { }); opts.step_container = &step_container; std::vector return_values; - TF_RETURN_IF_ERROR(dataset()->captured_func_->Run( - opts, args, &return_values, prefix())); + TF_RETURN_IF_ERROR( + dataset()->captured_func_->Run(opts, args, &return_values)); if (!(return_values.size() == 1 && return_values[0].dtype() == DT_RESOURCE && diff --git a/tensorflow/core/kernels/group_by_window_dataset_op.cc b/tensorflow/core/kernels/group_by_window_dataset_op.cc index 89e0722899..a53e9456ad 100644 --- a/tensorflow/core/kernels/group_by_window_dataset_op.cc +++ b/tensorflow/core/kernels/group_by_window_dataset_op.cc @@ -171,7 +171,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { // group. std::vector key_func_output; TF_RETURN_IF_ERROR(dataset()->captured_key_func_->Run( - opts, next_input_element, &key_func_output, prefix())); + opts, next_input_element, &key_func_output)); if (key_func_output.size() != 1 || key_func_output[0].dtype() != DT_INT64 || @@ -259,8 +259,8 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { {std::move(key_arg), std::move(group_dataset_arg)}); std::vector return_values; - TF_RETURN_IF_ERROR(dataset()->captured_reduce_func_->Run( - opts, args, &return_values, prefix())); + TF_RETURN_IF_ERROR( + dataset()->captured_reduce_func_->Run(opts, args, &return_values)); if (!(return_values.size() == 1 && return_values[0].dtype() == DT_RESOURCE && diff --git a/tensorflow/core/kernels/interleave_dataset_op.cc b/tensorflow/core/kernels/interleave_dataset_op.cc index 90907e49bd..dce4f88101 100644 --- a/tensorflow/core/kernels/interleave_dataset_op.cc +++ b/tensorflow/core/kernels/interleave_dataset_op.cc @@ -198,8 +198,8 @@ class InterleaveDatasetOp : public OpKernel { }); opts.step_container = &step_container; std::vector return_values; - TF_RETURN_IF_ERROR(dataset()->captured_func_->Run( - opts, input_element, &return_values, prefix())); + TF_RETURN_IF_ERROR(dataset()->captured_func_->Run(opts, input_element, + &return_values)); if (!(return_values.size() == 1 && return_values[0].dtype() == DT_RESOURCE && diff --git a/tensorflow/core/kernels/map_dataset_op.cc b/tensorflow/core/kernels/map_dataset_op.cc index 68bcb6a1b6..10f4c2b82c 100644 --- a/tensorflow/core/kernels/map_dataset_op.cc +++ b/tensorflow/core/kernels/map_dataset_op.cc @@ -122,8 +122,7 @@ class MapDatasetOp : public UnaryDatasetOpKernel { opts.runner = ctx->runner(); // TODO(mrry): Avoid blocking a threadpool thread. We will need to // stack-rip the iterators and use async kernels. - Status s = - dataset()->captured_func_->Run(opts, args, out_tensors, prefix()); + Status s = dataset()->captured_func_->Run(opts, args, out_tensors); if (errors::IsOutOfRange(s)) { // `f` may deliberately raise `errors::OutOfRange` to indicate // that we should terminate the iteration early. diff --git a/tensorflow/core/kernels/parallel_map_dataset_op.cc b/tensorflow/core/kernels/parallel_map_dataset_op.cc index 00093b6956..3f503581fb 100644 --- a/tensorflow/core/kernels/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/parallel_map_dataset_op.cc @@ -205,7 +205,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { opts.step_container = step_container; opts.runner = ctx->runner(); dataset()->captured_func_->RunAsync( - opts, input_element, &result->return_values, prefix(), + opts, input_element, &result->return_values, [result, step_container, result_index](Status ret_status) { delete step_container; result->status.Update(ret_status); -- GitLab From 95d240c5fbecec9fdbef55dc1154c4f454752633 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 28 Aug 2017 11:41:04 -0700 Subject: [PATCH 025/294] Fix Adam in Eager mode and test adam/momentum PiperOrigin-RevId: 166733547 --- tensorflow/python/framework/test_util.py | 8 +- tensorflow/python/ops/variable_scope.py | 2 +- tensorflow/python/training/adam.py | 8 +- tensorflow/python/training/adam_test.py | 79 +++++++----- tensorflow/python/training/momentum_test.py | 128 +++++++++++--------- tensorflow/python/training/optimizer.py | 2 + tensorflow/python/training/slot_creator.py | 4 +- 7 files changed, 138 insertions(+), 93 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index b494b988b3..aceebbc9cc 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -276,7 +276,8 @@ def enable_c_api(fn): def run_in_graph_and_eager_modes(__unused__=None, graph=None, config=None, - use_gpu=False, force_gpu=False): + use_gpu=False, force_gpu=False, + reset_test=False): """Runs the test in both graph and eager modes. Args: @@ -286,6 +287,7 @@ def run_in_graph_and_eager_modes(__unused__=None, graph=None, config=None, session. use_gpu: If True, attempt to run as many ops as possible on GPU. force_gpu: If True, pin all ops to `/device:GPU:0`. + reset_test: If True, tearDown and SetUp the test case again. Returns: Returns a decorator that will run the decorated test function @@ -302,6 +304,10 @@ def run_in_graph_and_eager_modes(__unused__=None, graph=None, config=None, with self.test_session(graph, config, use_gpu, force_gpu): f(self) + if reset_test: + self.tearDown() + self.setUp() + def run_eager_mode(): if force_gpu: gpu_name = gpu_device_name() diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 908b6b2111..b7913890e4 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -978,7 +978,7 @@ class VariableScope(object): def set_partitioner(self, partitioner): """Set partitioner for this scope.""" - if context.in_eager_mode(): + if partitioner and context.in_eager_mode(): raise NotImplementedError("Partitioned variables are not yet supported " "in Eager mode.") self._partitioner = partitioner diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py index 796402425a..cdc532a38e 100644 --- a/tensorflow/python/training/adam.py +++ b/tensorflow/python/training/adam.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops @@ -118,8 +119,11 @@ class AdamOptimizer(optimizer.Optimizer): # silently ignored). first_var = min(var_list, key=lambda x: x.name) - if (self._beta1_power is None or - self._beta1_power.graph is not first_var.graph): + create_new = self._beta1_power is None + if not create_new and context.in_graph_mode(): + create_new = (self._beta1_power.graph is not first_var.graph) + + if create_new: with ops.colocate_with(first_var): self._beta1_power = variable_scope.variable(self._beta1, name="beta1_power", diff --git a/tensorflow/python/training/adam_test.py b/tensorflow/python/training/adam_test.py index 62b171e234..defcf33714 100644 --- a/tensorflow/python/training/adam_test.py +++ b/tensorflow/python/training/adam_test.py @@ -21,9 +21,11 @@ from __future__ import print_function import numpy as np from tensorflow.python.client import session +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops @@ -149,49 +151,60 @@ class AdamOptimizerTest(test.TestCase): repeated_index_update_var.eval()) def doTestBasic(self, use_resource=False): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - # Initialize variables for numpy implementation. - m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable( + var0_np, name="var0_%d" % i) + var1 = resource_variable_ops.ResourceVariable( + var1_np, name="var1_%d" % i) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) - if use_resource: - var0 = resource_variable_ops.ResourceVariable(var0_np) - var1 = resource_variable_ops.ResourceVariable(var1_np) - else: - var0 = variables.Variable(var0_np) - var1 = variables.Variable(var1_np) - grads0 = constant_op.constant(grads0_np) - grads1 = constant_op.constant(grads1_np) - opt = adam.AdamOptimizer() - update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - variables.global_variables_initializer().run() + opt = adam.AdamOptimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + if context.in_graph_mode(): + self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], var0.eval()) - self.assertAllClose([3.0, 4.0], var1.eval()) + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - beta1_power, beta2_power = opt._get_beta_accumulators() + beta1_power, beta2_power = opt._get_beta_accumulators() - # Run 3 steps of Adam - for t in range(1, 4): - self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) - self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval()) - update.run() + # Run 3 steps of Adam + for t in range(1, 4): + if context.in_graph_mode(): + self.evaluate(update) + elif t > 1: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) - var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) + self.assertAllCloseAccordingToType(0.9**(t + 1), + self.evaluate(beta1_power)) + self.assertAllCloseAccordingToType(0.999**(t + 1), + self.evaluate(beta2_power)) - # Validate updated params - self.assertAllCloseAccordingToType(var0_np, var0.eval()) - self.assertAllCloseAccordingToType(var1_np, var1.eval()) + var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) def testBasic(self): - self.doTestBasic(use_resource=False) + with self.test_session(): + self.doTestBasic(use_resource=False) + @test_util.run_in_graph_and_eager_modes(reset_test=True) def testResourceBasic(self): self.doTestBasic(use_resource=True) diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py index 9d6221b560..ba9f763831 100644 --- a/tensorflow/python/training/momentum_test.py +++ b/tensorflow/python/training/momentum_test.py @@ -21,9 +21,11 @@ from __future__ import print_function import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import math_ops @@ -43,66 +45,82 @@ class MomentumOptimizerTest(test.TestCase): return var, accum def doTestBasic(self, use_resource=False): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - if use_resource: - var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype) - var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype) - else: - var0 = variables.Variable([1.0, 2.0], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - mom_opt = momentum_lib.MomentumOptimizer( - learning_rate=2.0, momentum=0.9) - mom_update = mom_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) - variables.global_variables_initializer().run() - # Check we have slots - self.assertEqual(["momentum"], mom_opt.get_slot_names()) - slot0 = mom_opt.get_slot(var0, "momentum") - self.assertEquals(slot0.get_shape(), var0.get_shape()) - self.assertFalse(slot0 in variables.trainable_variables()) - slot1 = mom_opt.get_slot(var1, "momentum") - self.assertEquals(slot1.get_shape(), var1.get_shape()) - self.assertFalse(slot1 in variables.trainable_variables()) + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + if use_resource: + var0 = resource_variable_ops.ResourceVariable( + [1.0, 2.0], dtype=dtype, name="var0_%d" % i) + var1 = resource_variable_ops.ResourceVariable( + [3.0, 4.0], dtype=dtype, name="var1_%d" % i) + else: + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + mom_opt = momentum_lib.MomentumOptimizer( + learning_rate=2.0, momentum=0.9) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + if context.in_graph_mode(): + self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], var0.eval()) - self.assertAllClose([3.0, 4.0], var1.eval()) - # Step 1: the momentum accumulators where 0. So we should see a normal - # update: v -= grad * learning_rate - mom_update.run() - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), slot0.eval()) - self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), slot1.eval()) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), var0.eval()) - self.assertAllCloseAccordingToType( - np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), var1.eval()) - # Step 2: the momentum accumulators contain the previous update. - mom_update.run() - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), slot0.eval()) - self.assertAllCloseAccordingToType( - np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), slot1.eval()) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([ - 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), - 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) - ]), var0.eval()) - self.assertAllCloseAccordingToType( - np.array([ - 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - ( - (0.9 * 0.01 + 0.01) * 2.0) - ]), var1.eval()) + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Check we have slots + self.assertEqual(["momentum"], mom_opt.get_slot_names()) + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEquals(slot0.get_shape(), var0.get_shape()) + self.assertFalse(slot0 in variables.trainable_variables()) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEquals(slot1.get_shape(), var1.get_shape()) + self.assertFalse(slot1 in variables.trainable_variables()) + + # Step 1: the momentum accumulators where 0. So we should see a normal + # update: v -= grad * learning_rate + if context.in_graph_mode(): + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), + self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1)) + # Step 2: the momentum accumulators contain the previous update. + if context.in_graph_mode(): + self.evaluate(mom_update) + else: + mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), + self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) + ]), self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([ + 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), 3.98 - ( + (0.9 * 0.01 + 0.01) * 2.0) + ]), self.evaluate(var1)) def testBasic(self): - self.doTestBasic(use_resource=False) + with self.test_session(): + self.doTestBasic(use_resource=False) + @test_util.run_in_graph_and_eager_modes(reset_test=True) def testResourceBasic(self): self.doTestBasic(use_resource=True) diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index 250e22f91e..86ba8e2c8e 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -69,6 +69,8 @@ def _deduplicate_indexed_slices(values, indices): def _var_key(var): + if context.in_eager_mode(): + return var._shared_name # pylint: disable=protected-access return (var.op.graph, var.op.name) diff --git a/tensorflow/python/training/slot_creator.py b/tensorflow/python/training/slot_creator.py index 4371e92bd3..ea28b5ddfc 100644 --- a/tensorflow/python/training/slot_creator.py +++ b/tensorflow/python/training/slot_creator.py @@ -39,6 +39,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops @@ -139,7 +140,8 @@ def create_slot_with_initializer(primary, initializer, shape, dtype, name, # and the same name has been previously used, the scope name will add '_N' # as suffix for unique identifications. validate_shape = shape.is_fully_defined() - with variable_scope.variable_scope(None, primary.op.name + "/" + name): + prefix = primary.op.name if context.in_graph_mode() else primary._shared_name # pylint: disable=protected-access + with variable_scope.variable_scope(None, prefix + "/" + name): if colocate_with_primary: with ops.colocate_with(primary): return _create_slot_var(primary, initializer, "", validate_shape, shape, -- GitLab From 45c3602cf43104be7073b4041aa049b3bb4390fa Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 28 Aug 2017 12:02:32 -0700 Subject: [PATCH 026/294] Adding tf_export decorator to support exporting tensorflow ops and constants. PiperOrigin-RevId: 166736891 --- tensorflow/python/util/tf_export.py | 128 ++++++++++++++++++ tensorflow/python/util/tf_export_test.py | 157 +++++++++++++++++++++++ 2 files changed, 285 insertions(+) create mode 100644 tensorflow/python/util/tf_export.py create mode 100644 tensorflow/python/util/tf_export_test.py diff --git a/tensorflow/python/util/tf_export.py b/tensorflow/python/util/tf_export.py new file mode 100644 index 0000000000..47396997d3 --- /dev/null +++ b/tensorflow/python/util/tf_export.py @@ -0,0 +1,128 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utilities for exporting TensorFlow symbols to the API. + +Exporting a function or a class: + +To export a function or a class use tf_export decorator. For e.g.: +```python +@tf_export('foo', 'bar.foo') +def foo(...): + ... +``` + +If a function is assigned to a variable, you can export it by calling +tf_export explicitly. For e.g.: +```python +foo = get_foo(...) +tf_export('foo', 'bar.foo')(foo) +``` + + +Exporting a constant +```python +foo = 1 +tf_export("consts.foo").export_constant(__name__, foo) +``` +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +from tensorflow.python.util import tf_decorator + + +class SymbolAlreadyExposedError(Exception): + """Raised when adding API names to symbol that already has API names.""" + pass + + +class tf_export(object): # pylint: disable=invalid-name + """Provides ways to export symbols to the TensorFlow API.""" + + def __init__(self, *args, **kwargs): + """Export under the names *args (first one is considered canonical). + + Args: + *args: API names in dot delimited format. + **kwargs: Optional keyed arguments. Currently only supports 'overrides' + argument. overrides: List of symbols that this is overriding + (those overrided api exports will be removed). Note: passing overrides + has no effect on exporting a constant. + """ + self._names = args + self._overrides = kwargs.get('overrides', []) + + def __call__(self, func): + """Calls this decorator. + + Args: + func: decorated symbol (function or class). + + Returns: + The input function with _tf_api_names attribute set. + + Raises: + SymbolAlreadyExposedError: Raised when a symbol already has API names. + """ + # Undecorate overridden names + for f in self._overrides: + _, undecorated_f = tf_decorator.unwrap(f) + del undecorated_f._tf_api_names # pylint: disable=protected-access + + _, undecorated_func = tf_decorator.unwrap(func) + + # Check for an existing api. We check if attribute name is in + # __dict__ instead of using hasattr to verify that subclasses have + # their own _tf_api_names as opposed to just inheriting it. + if '_tf_api_names' in undecorated_func.__dict__: + # pylint: disable=protected-access + raise SymbolAlreadyExposedError( + 'Symbol %s is already exposed as %s.' % + (undecorated_func.__name__, undecorated_func._tf_api_names)) + # pylint: enable=protected-access + + # Complete the export by creating/overriding attribute + # pylint: disable=protected-access + undecorated_func._tf_api_names = self._names + # pylint: enable=protected-access + return func + + def export_constant(self, module_name, value): + """Store export information for constants/string literals. + + Export information is stored in the module where constants/string literals + are defined. + + e.g. + ```python + foo = 1 + bar = 2 + tf_export("consts.foo").export_constant(__name__, foo) + tf_export("consts.bar").export_constant(__name__, bar) + ``` + + Args: + module_name: (string) Name of the module to store constant at. + value: Value of the constant. + """ + module = sys.modules[module_name] + if not hasattr(module, '_tf_api_constants'): + module._tf_api_constants = [] # pylint: disable=protected-access + # pylint: disable=protected-access + module._tf_api_constants.append((self._names, value)) + diff --git a/tensorflow/python/util/tf_export_test.py b/tensorflow/python/util/tf_export_test.py new file mode 100644 index 0000000000..3b7636c34e --- /dev/null +++ b/tensorflow/python/util/tf_export_test.py @@ -0,0 +1,157 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""tf_export tests.""" + +# pylint: disable=unused-import +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +from tensorflow.python.platform import test +from tensorflow.python.util import tf_decorator +from tensorflow.python.util import tf_export + + +def _test_function(unused_arg=0): + pass + + +def _test_function2(unused_arg=0): + pass + + +class TestClassA(object): + pass + + +class TestClassB(TestClassA): + pass + + +class ValidateExportTest(test.TestCase): + """Tests for tf_export class.""" + + class MockModule(object): + + def __init__(self, name): + self.__name__ = name + + def setUp(self): + self._modules = [] + + def tearDown(self): + for name in self._modules: + del sys.modules[name] + self._modules = [] + for symbol in [_test_function, _test_function, TestClassA, TestClassB]: + if hasattr(symbol, '_tf_api_names'): + del symbol._tf_api_names + + def _CreateMockModule(self, name): + mock_module = self.MockModule(name) + sys.modules[name] = mock_module + self._modules.append(name) + return mock_module + + def testExportSingleFunction(self): + export_decorator = tf_export.tf_export('nameA', 'nameB') + decorated_function = export_decorator(_test_function) + self.assertEquals(decorated_function, _test_function) + self.assertEquals(('nameA', 'nameB'), decorated_function._tf_api_names) + + def testExportMultipleFunctions(self): + export_decorator1 = tf_export.tf_export('nameA', 'nameB') + export_decorator2 = tf_export.tf_export('nameC', 'nameD') + decorated_function1 = export_decorator1(_test_function) + decorated_function2 = export_decorator2(_test_function2) + self.assertEquals(decorated_function1, _test_function) + self.assertEquals(decorated_function2, _test_function2) + self.assertEquals(('nameA', 'nameB'), decorated_function1._tf_api_names) + self.assertEquals(('nameC', 'nameD'), decorated_function2._tf_api_names) + + def testExportClasses(self): + export_decorator_a = tf_export.tf_export('TestClassA1') + export_decorator_a(TestClassA) + self.assertEquals(('TestClassA1',), TestClassA._tf_api_names) + self.assertTrue('_tf_api_names' not in TestClassB.__dict__) + + export_decorator_b = tf_export.tf_export('TestClassB1') + export_decorator_b(TestClassB) + self.assertEquals(('TestClassA1',), TestClassA._tf_api_names) + self.assertEquals(('TestClassB1',), TestClassB._tf_api_names) + + def testExportSingleConstant(self): + module1 = self._CreateMockModule('module1') + + test_constant = 123 + export_decorator = tf_export.tf_export('NAME_A', 'NAME_B') + export_decorator.export_constant('module1', test_constant) + self.assertEquals([(('NAME_A', 'NAME_B'), 123)], + module1._tf_api_constants) + + def testExportMultipleConstants(self): + module1 = self._CreateMockModule('module1') + module2 = self._CreateMockModule('module2') + + test_constant1 = 123 + test_constant2 = 'abc' + test_constant3 = 0.5 + + export_decorator1 = tf_export.tf_export('NAME_A', 'NAME_B') + export_decorator2 = tf_export.tf_export('NAME_C', 'NAME_D') + export_decorator3 = tf_export.tf_export('NAME_E', 'NAME_F') + export_decorator1.export_constant('module1', test_constant1) + export_decorator2.export_constant('module2', test_constant2) + export_decorator3.export_constant('module2', test_constant3) + self.assertEquals([(('NAME_A', 'NAME_B'), 123)], + module1._tf_api_constants) + self.assertEquals([(('NAME_C', 'NAME_D'), 'abc'), + (('NAME_E', 'NAME_F'), 0.5)], + module2._tf_api_constants) + + def testRaisesExceptionIfAlreadyHasAPINames(self): + _test_function._tf_api_names = ['abc'] + export_decorator = tf_export.tf_export('nameA', 'nameB') + with self.assertRaises(tf_export.SymbolAlreadyExposedError): + export_decorator(_test_function) + + def testOverridesFunction(self): + _test_function2._tf_api_names = ['abc'] + + export_decorator = tf_export.tf_export( + 'nameA', 'nameB', overrides=[_test_function2]) + export_decorator(_test_function) + + # _test_function overrides _test_function2. So, _tf_api_names + # should be removed from _test_function2. + self.assertFalse(hasattr(_test_function2, '_tf_api_names')) + + def testMultipleDecorators(self): + def get_wrapper(func): + def wrapper(*unused_args, **unused_kwargs): + pass + return tf_decorator.make_decorator(func, wrapper) + decorated_function = get_wrapper(_test_function) + + export_decorator = tf_export.tf_export('nameA', 'nameB') + exported_function = export_decorator(decorated_function) + self.assertEquals(decorated_function, exported_function) + self.assertEquals(('nameA', 'nameB'), _test_function._tf_api_names) + + +if __name__ == '__main__': + test.main() -- GitLab From 789fb5f46dc2d46261f65a09b768cef9bc7f4ae9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 28 Aug 2017 12:52:15 -0700 Subject: [PATCH 027/294] Fixes device assignment when local task type is not "worker" PiperOrigin-RevId: 166743522 --- .../python/training/functions/gbdt_batch.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index b6a07eafd5..6f85874a33 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -467,6 +467,11 @@ class GradientBoostedDecisionTreeModel(object): Raises: ValueError: if inputs are not valid. """ + # Get the worker device from input dependencies. + input_deps = (self._dense_floats + self._sparse_float_indices + + self._sparse_int_indices) + worker_device = input_deps[0].device + # Get tensors relevant for training and form the loss. predictions = predictions_dict[PREDICTIONS] partition_ids = predictions_dict[PARTITION_IDS] @@ -478,7 +483,6 @@ class GradientBoostedDecisionTreeModel(object): colocate_gradients_with_ops=False, gate_gradients=0, aggregation_method=None)[0] - strategy = self._learner_config.multi_class_strategy num_classes = self._learner_config.num_classes @@ -541,7 +545,7 @@ class GradientBoostedDecisionTreeModel(object): fc_name_idx = 0 handlers = [] init_stamp_token = constant_op.constant(0, dtype=dtypes.int64) - with ops.device(self._get_replica_device_setter()): + with ops.device(self._get_replica_device_setter(worker_device)): # Create handlers for dense float columns for dense_float_column_idx in range(len(self._dense_floats)): fc_name = self._fc_names[fc_name_idx] @@ -666,10 +670,6 @@ class GradientBoostedDecisionTreeModel(object): # Update handler stats. handler_reads = {} - - input_deps = (self._dense_floats + self._sparse_float_indices + - self._sparse_int_indices) - worker_device = input_deps[0].device for handler in handlers: handler_reads[handler] = handler.scheduled_reads() @@ -841,7 +841,7 @@ class GradientBoostedDecisionTreeModel(object): return diag_hessian_list - def _get_replica_device_setter(self): + def _get_replica_device_setter(self, worker_device): """Creates a replica device setter.""" ps_tasks = self._num_ps_replicas ps_ops = [ @@ -854,6 +854,7 @@ class GradientBoostedDecisionTreeModel(object): ] ps_strategy = _OpRoundRobinStrategy(ps_ops, ps_tasks) return device_setter.replica_device_setter( + worker_device=worker_device, ps_tasks=ps_tasks, merge_devices=True, ps_ops=ps_ops, -- GitLab From e705629ea7bcdc257f567f55100e3f793f3d1372 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 28 Aug 2017 13:02:48 -0700 Subject: [PATCH 028/294] Fix device for ResourceVariables, and add tape.watch on reading them. Print status for errors in resource variable lookups. PiperOrigin-RevId: 166744927 --- .../core/kernels/resource_variable_ops.cc | 34 +++++++------- tensorflow/python/eager/backprop.py | 2 + tensorflow/python/framework/test_util.py | 6 +-- .../resource_variable_ops_test.py | 13 ++---- tensorflow/python/ops/gradients_impl.py | 5 +++ .../python/ops/resource_variable_ops.py | 44 +++++++++++++------ 6 files changed, 61 insertions(+), 43 deletions(-) diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index 2100b2d491..12b10d61da 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -48,12 +48,13 @@ class ReadVariableOp : public OpKernel { void Compute(OpKernelContext* ctx) override { Var* variable = nullptr; ResourceHandle handle = HandleFromInput(ctx, 0); - OP_REQUIRES( - ctx, LookupResource(ctx, handle, &variable).ok(), - errors::NotFound("Attempted to read a nonexistent variable. " - "This usually means that the variable was not " - "initialized. Container: ", - handle.container(), ", name: ", handle.name())); + const auto status = LookupResource(ctx, handle, &variable); + OP_REQUIRES(ctx, status.ok(), + errors::NotFound( + "Error while reading resource variable ", handle.name(), + " from Container: ", handle.container(), + ". This could mean that the variable was not initialized. ", + status.ToString())); core::ScopedUnref s(variable); // TODO(apassos): It's possible to do copy-on-write here instead of always @@ -279,11 +280,11 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS); #undef REGISTER_KERNELS #if GOOGLE_CUDA -#define REGISTER_GPU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER(Name("AssignVariableOp") \ - .Device(DEVICE_GPU) \ - .TypeConstraint("dtype") \ - .HostMemory("resource"), \ +#define REGISTER_GPU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER(Name("AssignVariableOp") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("dtype") \ + .HostMemory("resource"), \ AssignVariableOp); TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS); @@ -450,11 +451,12 @@ class ResourceScatterUpdateOp : public OpKernel { // Check that we have enough index space const int64 N_big = indices.NumElements(); - OP_REQUIRES(c, N_big <= std::numeric_limits::max(), - errors::InvalidArgument( - "indices has too many elements for ", - DataTypeString(DataTypeToEnum::v()), " indexing: ", - N_big, " > ", std::numeric_limits::max())); + OP_REQUIRES( + c, N_big <= std::numeric_limits::max(), + errors::InvalidArgument("indices has too many elements for ", + DataTypeString(DataTypeToEnum::v()), + " indexing: ", N_big, " > ", + std::numeric_limits::max())); const Index N = static_cast(indices.NumElements()); OP_REQUIRES( c, params->dim_size(0) <= std::numeric_limits::max(), diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 3a70eaeaa5..82ffaccde7 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -176,6 +176,8 @@ def _aggregate_grads(gradients): """Aggregate gradients of the same tensor.""" grad_lists = dict() for t, g in gradients: + if g is None: + continue if id(t) not in grad_lists: grad_lists[id(t)] = [(t, g)] else: diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index aceebbc9cc..2ed69f4da7 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -322,12 +322,10 @@ def run_in_graph_and_eager_modes(__unused__=None, graph=None, config=None, with context.device("/device:CPU:0"): f(self) + eager_graph = graph or ops.Graph() with context.eager_mode(): - if graph is None: + with eager_graph.as_default(): run_eager_mode() - else: - with graph.as_default(): - run_eager_mode() return decorated return decorator diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index cfb47c9bf3..98fc568515 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -302,14 +302,9 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): x = resource_variable_ops.var_handle_op( dtype=v.dtype.base_dtype, shape=v.get_shape(), shared_name="var5") - if context.in_graph_mode(): - with self.assertRaisesOpError("Resource .*/var5/.* does not exist"): - x_read = resource_variable_ops.read_variable_op(x, v.dtype.base_dtype) - self.evaluate(x_read) - else: - with self.assertRaisesRegexp(errors.NotFoundError, - "Attempted to read a nonexistent variable."): - _ = resource_variable_ops.read_variable_op(x, v.dtype.base_dtype) + with self.assertRaisesOpError("Resource .*/var5/.* does not exist"): + x_read = resource_variable_ops.read_variable_op(x, v.dtype.base_dtype) + self.evaluate(x_read) @test_util.run_in_graph_and_eager_modes() def testSharedNameWithNamescope(self): @@ -373,7 +368,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.assertEqual(dtypes.int32, v.dtype) self.assertEqual("foo/var7:0", v.name) self.assertAllEqual([10, 20, 35], v.shape.as_list()) - self.assertAllEqual(init.device, v.device) + self.assertEqual(context.get_default_context().device_name, v.device) self.assertTrue(isinstance(v.handle, ops.EagerTensor)) self.assertEqual(constraint, v.constraint) self.assertAllEqual(init.numpy(), v.read_value().numpy()) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index e073dbc640..64987f93dd 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -27,6 +27,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.core.framework import attr_value_pb2 +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -414,8 +415,12 @@ def gradients(ys, LookupError: if one of the operations between `x` and `y` does not have a registered gradient function. ValueError: if the arguments are invalid. + RuntimeError: if called in Eager mode. """ + if context.in_eager_mode(): + raise RuntimeError("tf.gradients not supported in EAGER mode. Use " + "functions in tf.contrib.eager.backprop instead.") ys = _AsList(ys) xs = _AsList(xs) if grad_ys is None: diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index ed344c0b14..e43cf4bc04 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -23,6 +23,7 @@ from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import variable_pb2 from tensorflow.python.eager import context from tensorflow.python.eager import custom_gradient +from tensorflow.python.eager import tape from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -223,6 +224,7 @@ class ResourceVariable(variables.Variable): if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") + self._trainable = trainable if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] self._save_slice_info = None @@ -249,6 +251,8 @@ class ResourceVariable(variables.Variable): dtype=initial_value.dtype.base_dtype, shared_name=handle_name, name=name) + self._handle_device = (self._handle.device if in_graph_mode else + context.get_default_context().device_name) else: initial_value = initial_value() initial_value = ops.convert_to_tensor( @@ -259,6 +263,8 @@ class ResourceVariable(variables.Variable): shared_name=handle_name, name=name, container="") + self._handle_device = (self._handle.device if in_graph_mode else + context.get_default_context().device_name) # pylint: enable=protected-access # Or get the initial value from a Tensor or Python object. @@ -267,8 +273,7 @@ class ResourceVariable(variables.Variable): initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) # pylint: disable=protected-access - if (in_graph_mode and - initial_value is not None and + if (in_graph_mode and initial_value is not None and initial_value.op._get_control_flow_context() is not None): raise ValueError( "Initializer for variable %s is from inside a control-flow " @@ -282,6 +287,8 @@ class ResourceVariable(variables.Variable): shared_name=handle_name, name=name, container="") + self._handle_device = (self._handle.device if in_graph_mode else + context.get_default_context().device_name) self._initial_value = initial_value if in_graph_mode else None self._handle_name = handle_name + ":0" @@ -302,8 +309,8 @@ class ResourceVariable(variables.Variable): with ops.name_scope("Read"), ops.colocate_with(self._handle): # Manually assign reads to the handle's device to avoid log # messages. - with ops.device(self._handle.device): - value = read_variable_op(self._handle, dtype=self._dtype) + with ops.device(self._handle_device): + value = self._read_variable_op() self._graph_element = value if caching_device is not None: # Variables may be created in a tf.device() or ops.colocate_with() @@ -326,14 +333,14 @@ class ResourceVariable(variables.Variable): self._graph_element = None if caching_device: with ops.device(caching_device): - self._cached_value = read_variable_op(self._handle, - dtype=self._dtype) + self._cached_value = self._read_variable_op() else: self._cached_value = None ops.add_to_collections(collections, self) def _init_from_proto(self, variable_def, import_scope=None): """Initializes from `VariableDef` proto.""" + # Note that init_from_proto is currently not supported in Eager mode. assert context.in_graph_mode() assert isinstance(variable_def, variable_pb2.VariableDef) if not variable_def.is_resource: @@ -344,6 +351,7 @@ class ResourceVariable(variables.Variable): self._handle = g.as_graph_element( ops.prepend_name_scope( variable_def.variable_name, import_scope=import_scope)) + self._handle_device = self._handle.device self._handle_name = self._handle.name self._initializer_op = g.as_graph_element( ops.prepend_name_scope( @@ -372,7 +380,7 @@ class ResourceVariable(variables.Variable): @property def device(self): """The device this variable is on.""" - return self._handle.device + return self._handle_device @property def graph(self): @@ -409,8 +417,8 @@ class ResourceVariable(variables.Variable): if self._cached_value is not None: return self._cached_value with ops.colocate_with(None, ignore_existing=True): - with ops.device(self._handle.device): - return read_variable_op(self._handle, dtype=self._dtype) + with ops.device(self._handle_device): + return self._read_variable_op() def _as_graph_element(self): """Conversion function for Graph.as_graph_element().""" @@ -425,7 +433,7 @@ class ResourceVariable(variables.Variable): def initial_value(self): """Returns the Tensor used as the initial value for the variable.""" if context.in_eager_mode(): - raise RuntimeError("initial_value not supported in EAGER mode.""") + raise RuntimeError("initial_value not supported in EAGER mode.") return self._initial_value @property @@ -460,6 +468,11 @@ class ResourceVariable(variables.Variable): def _get_save_slice_info(self): return self._save_slice_info + def _read_variable_op(self): + if context.in_eager_mode() and self._trainable: + tape.watch(self._handle) + return read_variable_op(self._handle, dtype=self._dtype) + def read_value(self): """Constructs an op which reads the value of this variable. @@ -477,10 +490,10 @@ class ResourceVariable(variables.Variable): # separate notions of device and memory, so handle.device can be GPU while # handle.memory_space is always CPU. if context.in_graph_mode(): - with ops.device(self._handle.device): - value = read_variable_op(self._handle, dtype=self._dtype) + with ops.device(self._handle_device): + value = self._read_variable_op() else: - value = read_variable_op(self._handle, dtype=self._dtype) + value = self._read_variable_op() # Return an identity so it can get placed on whatever device the context # specifies instead of the device where the variable is. return array_ops.identity(value) @@ -653,8 +666,10 @@ def read_variable_op(handle, dtype): A `Tensor` of type `dtype`. """ result = gen_resource_variable_ops.read_variable_op(handle, dtype) + def grad(dresult): return dresult + return result, grad @@ -720,7 +735,8 @@ def resource_gather(resource, indices, dtype, validate_indices=True, name=None): def grad(dresult): return ops.IndexedSlices( - dresult, indices, + dresult, + indices, dense_shape=gen_resource_variable_ops.variable_shape(resource)) return result, grad -- GitLab From db1e43e3335e190db1a27e0e74f104d7bc5471dd Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 28 Aug 2017 13:34:27 -0700 Subject: [PATCH 029/294] Java: Support multi-dimensional STRING tensors. Fixes #8531 PiperOrigin-RevId: 166749470 --- .../android/TensorFlowInferenceInterface.java | 19 ++ .../src/main/java/org/tensorflow/Tensor.java | 63 +++-- tensorflow/java/src/main/native/tensor_jni.cc | 225 ++++++++++++++++-- tensorflow/java/src/main/native/tensor_jni.h | 13 +- .../test/java/org/tensorflow/TensorTest.java | 24 ++ 5 files changed, 306 insertions(+), 38 deletions(-) diff --git a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java index a75ec45e0b..6389ef1f5d 100644 --- a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java +++ b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java @@ -267,6 +267,25 @@ public class TensorFlowInferenceInterface { addFeed(inputName, Tensor.create(DataType.UINT8, dims, ByteBuffer.wrap(src))); } + /** + * Copy a byte sequence into the input Tensor with name {@link inputName} as a string-valued + * scalar tensor. In the TensorFlow type system, a "string" is an arbitrary sequence of + * bytes, not a Java {@code String} (which is a sequence of characters). + */ + public void feedString(String inputName, byte[] src) { + addFeed(inputName, Tensor.create(src)); + } + + /** + * Copy an array of byte sequences into the input Tensor with name {@link inputName} as a + * string-valued one-dimensional tensor (vector). In the TensorFlow type system, a "string" + * is an arbitrary sequence of bytes, not a Java {@code String} (which is a sequence of + * characters). + */ + public void feedString(String inputName, byte[][] src) { + addFeed(inputName, Tensor.create(src)); + } + // Methods for taking a native Tensor and filling it with src from Java native IO buffers. /** diff --git a/tensorflow/java/src/main/java/org/tensorflow/Tensor.java b/tensorflow/java/src/main/java/org/tensorflow/Tensor.java index f4f853f716..ffaa242a31 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Tensor.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Tensor.java @@ -71,6 +71,27 @@ public final class Tensor implements AutoCloseable { * Tensor x = Tensor.create(twoD); * } * + * {@link DataType#STRING} typed Tensors are multi-dimensionary arrays of arbitrary byte sequences + * and thus have {@code byte[]} and not {@code String}-valued elements. For example: + * + *

{@code
+   * // Valid: A DataType.STRING tensor.
+   * Tensor s = Tensor.create(new byte[]{1, 2, 3});
+   *
+   * // Java Strings will need to be encoded into a byte-sequence.
+   * String mystring = "foo";
+   * Tensor s = Tensor.create(mystring.getBytes("UTF-8"));
+   *
+   * // Valid: Matrix of DataType.STRING tensors.
+   * // Each element might have a different length.
+   * byte[][][] matrix = new byte[2][2][];
+   * matrix[0][0] = "this".getBytes("UTF-8");
+   * matrix[0][1] = "is".getBytes("UTF-8");
+   * matrix[1][0] = "a".getBytes("UTF-8");
+   * matrix[1][1] = "matrix".getBytes("UTF-8");
+   * Tensor m = Tensor.create(matrix);
+   * }
+ * * @throws IllegalArgumentException if {@code obj} is not compatible with the TensorFlow type * system, or if obj does not disambiguate between multiple DataTypes. In that case, consider * using {@link #create(DataType, long[], ByteBuffer)} instead. @@ -85,10 +106,7 @@ public final class Tensor implements AutoCloseable { t.nativeHandle = allocate(t.dtype.c(), t.shapeCopy, byteSize); setValue(t.nativeHandle, obj); } else if (t.shapeCopy.length != 0) { - throw new UnsupportedOperationException( - String.format( - "non-scalar DataType.STRING tensors are not supported yet (version %s). Please file a feature request at https://github.com/tensorflow/tensorflow/issues/new", - TensorFlow.version())); + t.nativeHandle = allocateNonScalarBytes(t.shapeCopy, (Object[]) obj); } else { t.nativeHandle = allocateScalarBytes((byte[]) obj); } @@ -172,7 +190,8 @@ public final class Tensor implements AutoCloseable { * *

Creates a Tensor with the provided shape of any type where the tensor's data has been * encoded into {@code data} as per the specification of the TensorFlow C API. + * href="https://www.tensorflow.org/code/tensorflow/c/c_api.h">C + * API. * * @param dataType the tensor datatype. * @param shape the tensor shape. @@ -328,9 +347,9 @@ public final class Tensor implements AutoCloseable { * Copies the contents of the tensor to {@code dst} and returns {@code dst}. * *

For non-scalar tensors, this method copies the contents of the underlying tensor to a Java - * array. For scalar tensors, use one of {@link #floatValue()}, {@link #doubleValue()}, {@link - * #intValue()}, {@link #longValue()} or {@link #booleanValue()} instead. The type and shape of - * {@code dst} must be compatible with the tensor. For example: + * array. For scalar tensors, use one of {@link #bytesValue()}, {@link #floatValue()}, {@link + * #doubleValue()}, {@link #intValue()}, {@link #longValue()} or {@link #booleanValue()} instead. + * The type and shape of {@code dst} must be compatible with the tensor. For example: * *

{@code
    * int matrix[2][2] = {{1,2},{3,4}};
@@ -512,6 +531,13 @@ public final class Tensor implements AutoCloseable {
     throw new IllegalArgumentException("DataType " + dataType + " is not supported yet");
   }
 
+  private static void throwExceptionIfNotByteOfByteArrays(Object array) {
+    if (!array.getClass().getName().equals("[[B")) {
+      throw new IllegalArgumentException(
+          "object cannot be converted to a Tensor as it includes an array with null elements");
+    }
+  }
+
   private static DataType dataTypeOf(Object o) {
     if (o.getClass().isArray()) {
       if (Array.getLength(o) == 0) {
@@ -519,6 +545,10 @@ public final class Tensor implements AutoCloseable {
       }
       // byte[] is a DataType.STRING scalar.
       Object e = Array.get(o, 0);
+      if (e == null) {
+        throwExceptionIfNotByteOfByteArrays(o);
+        return DataType.STRING;
+      }
       if (Byte.class.isInstance(e) || byte.class.isInstance(e)) {
         return DataType.STRING;
       }
@@ -541,9 +571,11 @@ public final class Tensor implements AutoCloseable {
 
   private static int numDimensions(Object o) {
     if (o.getClass().isArray()) {
-      // byte[] is a DataType.STRING scalar.
       Object e = Array.get(o, 0);
-      if (Byte.class.isInstance(e) || byte.class.isInstance(e)) {
+      if (e == null) {
+        throwExceptionIfNotByteOfByteArrays(o);
+        return 1;
+      } else if (Byte.class.isInstance(e) || byte.class.isInstance(e)) {
         return 0;
       }
       return 1 + numDimensions(e);
@@ -568,11 +600,12 @@ public final class Tensor implements AutoCloseable {
   }
 
   private void throwExceptionIfTypeIsIncompatible(Object o) {
-    if (numDimensions(o) != numDimensions()) {
+    final int rank = numDimensions();
+    final int oRank = numDimensions(o);
+    if (oRank != rank) {
       throw new IllegalArgumentException(
           String.format(
-              "cannot copy Tensor with %d dimensions into an object with %d",
-              numDimensions(), numDimensions(o)));
+              "cannot copy Tensor with %d dimensions into an object with %d", rank, oRank));
     }
     if (dataTypeOf(o) != dtype) {
       throw new IllegalArgumentException(
@@ -580,7 +613,7 @@ public final class Tensor implements AutoCloseable {
               "cannot copy Tensor with DataType %s into an object of type %s",
               dtype.toString(), o.getClass().getName()));
     }
-    long[] oShape = new long[numDimensions()];
+    long[] oShape = new long[rank];
     fillShape(o, 0, oShape);
     for (int i = 0; i < oShape.length; ++i) {
       if (oShape[i] != shape()[i]) {
@@ -596,6 +629,8 @@ public final class Tensor implements AutoCloseable {
 
   private static native long allocateScalarBytes(byte[] value);
 
+  private static native long allocateNonScalarBytes(long[] shape, Object[] value);
+
   private static native void delete(long handle);
 
   private static native ByteBuffer buffer(long handle);
diff --git a/tensorflow/java/src/main/native/tensor_jni.cc b/tensorflow/java/src/main/native/tensor_jni.cc
index dfdca357f7..7bfe6c896d 100644
--- a/tensorflow/java/src/main/native/tensor_jni.cc
+++ b/tensorflow/java/src/main/native/tensor_jni.cc
@@ -213,6 +213,108 @@ size_t readNDArray(JNIEnv* env, TF_DataType dtype, const char* src,
     return sz;
   }
 }
+
+jbyteArray TF_StringDecodeTojbyteArray(JNIEnv* env, const char* src,
+                                       size_t src_len, TF_Status* status) {
+  const char* dst = nullptr;
+  size_t dst_len = 0;
+  TF_StringDecode(src, src_len, &dst, &dst_len, status);
+  if (TF_GetCode(status) != TF_OK) {
+    return nullptr;
+  }
+  jbyteArray ret = env->NewByteArray(dst_len);
+  jbyte* cpy = env->GetByteArrayElements(ret, nullptr);
+  memcpy(cpy, dst, dst_len);
+  env->ReleaseByteArrayElements(ret, cpy, 0);
+  return ret;
+}
+
+class StringTensorWriter {
+ public:
+  StringTensorWriter(TF_Tensor* t, int num_elements)
+      : offset_(0),
+        poffsets_(static_cast(TF_TensorData(t))),
+        pdata_(poffsets_ + 8 * num_elements),
+        plimit_(poffsets_ + TF_TensorByteSize(t)) {}
+
+  void Add(const char* src, size_t len, TF_Status* status) {
+    if (TF_GetCode(status) != TF_OK) return;
+    if (plimit_ - poffsets_ < sizeof(offset_)) {
+      TF_SetStatus(status, TF_OUT_OF_RANGE,
+                   "TF_STRING tensor encoding ran out of space for offsets, "
+                   "this is likely a bug, please file an issue at "
+                   "https://github.com/tensorflow/tensorflow/issues/new");
+      return;
+    }
+    memcpy(poffsets_, &offset_, sizeof(offset_));
+    size_t written =
+        TF_StringEncode(src, len, pdata_, (plimit_ - pdata_), status);
+    offset_ += written;
+    poffsets_ += 8;
+    pdata_ += written;
+  }
+
+ private:
+  uint64_t offset_;
+  char* poffsets_;
+  char* pdata_;
+  const char* plimit_;
+};
+
+class StringTensorReader {
+ public:
+  StringTensorReader(const TF_Tensor* t, int num_elements)
+      : index_(0),
+        offsets_(static_cast(TF_TensorData(t))),
+        data_(offsets_ + 8 * num_elements),
+        limit_(offsets_ + TF_TensorByteSize(t)) {}
+
+  jbyteArray Next(JNIEnv* env, TF_Status* status) {
+    if (TF_GetCode(status) != TF_OK) return nullptr;
+    uint64_t offset = 0;
+    const char* poffset = offsets_ + sizeof(offset) * index_;
+    if (poffset >= limit_) {
+      TF_SetStatus(
+          status, TF_INTERNAL,
+          "Invalid TF_STRING tensor, offsets table seems to be too small");
+      return nullptr;
+    }
+    memcpy(&offset, poffset, sizeof(offset));
+    const char* pdata = data_ + offset;
+    if (pdata >= limit_) {
+      TF_SetStatus(status, TF_INTERNAL,
+                   "Invalid TF_STRING tensor, invalid entry in offset table");
+      return nullptr;
+    }
+    ++index_;
+    return TF_StringDecodeTojbyteArray(env, pdata, (limit_ - pdata), status);
+  }
+
+ private:
+  int index_;
+  const char* offsets_;
+  const char* data_;
+  const char* limit_;
+};
+
+void readNDStringArray(JNIEnv* env, StringTensorReader* reader, int dims_left,
+                       jobjectArray dst, TF_Status* status) {
+  jsize len = env->GetArrayLength(dst);
+  if (dims_left == 1) {
+    for (jsize i = 0; i < len; ++i) {
+      jbyteArray elem = reader->Next(env, status);
+      if (TF_GetCode(status) != TF_OK) return;
+      env->SetObjectArrayElement(dst, i, elem);
+    }
+    return;
+  }
+  for (jsize i = 0; i < len; ++i) {
+    jobjectArray arr =
+        static_cast(env->GetObjectArrayElement(dst, i));
+    readNDStringArray(env, reader, dims_left - 1, arr, status);
+    if (TF_GetCode(status) != TF_OK) return;
+  }
+}
 }  // namespace
 
 JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_allocate(JNIEnv* env,
@@ -264,18 +366,13 @@ JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_allocateScalarBytes(
   char* dst = static_cast(TF_TensorData(t));
   memset(dst, 0, 8);  // The offset table
 
-  // jbyte is a signed char, while the C standard doesn't require char and
-  // signed char to be the same. As a result, static_cast(src) will
-  // complain. Copy the string instead. sigh!
+  TF_Status* status = TF_NewStatus();
   jbyte* jsrc = env->GetByteArrayElements(value, nullptr);
-  std::unique_ptr src(new char[src_len]);
-  static_assert(sizeof(jbyte) == sizeof(char),
-                "Cannot convert Java byte to a C char");
-  memcpy(src.get(), jsrc, src_len);
+  // jsrc is an unsigned byte*, TF_StringEncode requires a char*.
+  // reinterpret_cast<> for this conversion should be safe.
+  TF_StringEncode(reinterpret_cast(jsrc), src_len, dst + 8,
+                  dst_len, status);
   env->ReleaseByteArrayElements(value, jsrc, JNI_ABORT);
-
-  TF_Status* status = TF_NewStatus();
-  TF_StringEncode(src.get(), src_len, dst + 8, dst_len, status);
   if (!throwExceptionIfNotOK(env, status)) {
     TF_DeleteStatus(status);
     return 0;
@@ -284,6 +381,85 @@ JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_allocateScalarBytes(
   return reinterpret_cast(t);
 }
 
+namespace {
+size_t nonScalarTF_STRINGTensorSize(JNIEnv* env, jarray value, int num_dims) {
+  if (num_dims == 0) {
+    // This is the last dimension, i.e., value should correspond to a jbyteArray
+    // encoding the string.
+    return TF_StringEncodedSize(
+        static_cast(env->GetArrayLength(value)));
+  }
+  jsize len = env->GetArrayLength(value);
+  size_t ret = 0;
+  for (jsize i = 0; i < len; ++i) {
+    jarray elem = static_cast(
+        env->GetObjectArrayElement(static_cast(value), i));
+    ret += nonScalarTF_STRINGTensorSize(env, elem, num_dims - 1);
+  }
+  return ret;
+}
+
+void fillNonScalarTF_STRINGTensorData(JNIEnv* env, jarray value, int num_dims,
+                                      StringTensorWriter* writer,
+                                      TF_Status* status) {
+  if (num_dims == 0) {
+    jbyte* jsrc =
+        env->GetByteArrayElements(static_cast(value), nullptr);
+    writer->Add(reinterpret_cast(jsrc), env->GetArrayLength(value),
+                status);
+    env->ReleaseByteArrayElements(static_cast(value), jsrc,
+                                  JNI_ABORT);
+    return;
+  }
+  jsize len = env->GetArrayLength(value);
+  for (jsize i = 0; i < len; ++i) {
+    jarray elem = static_cast(
+        env->GetObjectArrayElement(static_cast(value), i));
+    if (TF_GetCode(status) != TF_OK) return;
+    fillNonScalarTF_STRINGTensorData(env, elem, num_dims - 1, writer, status);
+  }
+}
+}  // namespace
+
+JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_allocateNonScalarBytes(
+    JNIEnv* env, jclass clazz, jlongArray shape, jobjectArray value) {
+  // TF_STRING tensors are encoded with a table of 8-byte offsets following by
+  // TF_StringEncode-encoded bytes.
+  const int num_dims = static_cast(env->GetArrayLength(shape));
+  int64_t* dims = new int64_t[num_dims];
+  int64_t num_elements = 1;
+  {
+    jlong* jdims = env->GetLongArrayElements(shape, nullptr);
+    for (int i = 0; i < num_dims; ++i) {
+      dims[i] = static_cast(jdims[i]);
+      num_elements *= dims[i];
+    }
+    env->ReleaseLongArrayElements(shape, jdims, JNI_ABORT);
+  }
+  const size_t encoded_size =
+      nonScalarTF_STRINGTensorSize(env, value, num_dims);
+  TF_Tensor* t = TF_AllocateTensor(TF_STRING, dims, num_dims,
+                                   8 * num_elements + encoded_size);
+  if (t == nullptr) {
+    delete[] dims;
+    throwException(env, kNullPointerException,
+                   "unable to allocate memory for the Tensor");
+    return 0;
+  }
+  TF_Status* status = TF_NewStatus();
+  StringTensorWriter writer(t, num_elements);
+  fillNonScalarTF_STRINGTensorData(env, value, num_dims, &writer, status);
+  delete[] dims;
+  jlong ret = 0;
+  if (!throwExceptionIfNotOK(env, status)) {
+    TF_DeleteTensor(t);
+  } else {
+    ret = reinterpret_cast(t);
+  }
+  TF_DeleteStatus(status);
+  return ret;
+}
+
 JNIEXPORT void JNICALL Java_org_tensorflow_Tensor_delete(JNIEnv* env,
                                                          jclass clazz,
                                                          jlong handle) {
@@ -292,8 +468,8 @@ JNIEXPORT void JNICALL Java_org_tensorflow_Tensor_delete(JNIEnv* env,
 }
 
 JNIEXPORT jobject JNICALL Java_org_tensorflow_Tensor_buffer(JNIEnv* env,
-                                                              jclass clazz,
-                                                              jlong handle) {
+                                                            jclass clazz,
+                                                            jlong handle) {
   TF_Tensor* t = requireHandle(env, handle);
   if (t == nullptr) return nullptr;
   void* data = TF_TensorData(t);
@@ -393,17 +569,9 @@ JNIEXPORT jbyteArray JNICALL Java_org_tensorflow_Tensor_scalarBytes(
                    "invalid tensor encoding: bad offsets");
     return nullptr;
   }
-  jbyteArray ret = nullptr;
-  const char* dst = nullptr;
-  size_t dst_len = 0;
   TF_Status* status = TF_NewStatus();
-  TF_StringDecode(src, src_len, &dst, &dst_len, status);
-  if (throwExceptionIfNotOK(env, status)) {
-    ret = env->NewByteArray(dst_len);
-    jbyte* cpy = env->GetByteArrayElements(ret, nullptr);
-    memcpy(cpy, dst, dst_len);
-    env->ReleaseByteArrayElements(ret, cpy, 0);
-  }
+  jbyteArray ret = TF_StringDecodeTojbyteArray(env, src, src_len, status);
+  throwExceptionIfNotOK(env, status);
   TF_DeleteStatus(status);
   return ret;
 }
@@ -424,6 +592,19 @@ JNIEXPORT void JNICALL Java_org_tensorflow_Tensor_readNDArray(JNIEnv* env,
                    "accessor (floatValue(), intValue() etc.) instead");
     return;
   }
+  if (dtype == TF_STRING) {
+    int64_t num_elements = 1;
+    for (int i = 0; i < num_dims; ++i) {
+      num_elements *= TF_Dim(t, i);
+    }
+    StringTensorReader reader(t, num_elements);
+    TF_Status* status = TF_NewStatus();
+    readNDStringArray(env, &reader, num_dims, static_cast(value),
+                      status);
+    throwExceptionIfNotOK(env, status);
+    TF_DeleteStatus(status);
+    return;
+  }
   readNDArray(env, dtype, static_cast(data), sz, num_dims,
               static_cast(value));
 }
diff --git a/tensorflow/java/src/main/native/tensor_jni.h b/tensorflow/java/src/main/native/tensor_jni.h
index 70850d250b..a300936884 100644
--- a/tensorflow/java/src/main/native/tensor_jni.h
+++ b/tensorflow/java/src/main/native/tensor_jni.h
@@ -28,7 +28,8 @@ extern "C" {
  * Signature: (I[JJ)J
  */
 JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_allocate(JNIEnv *, jclass,
-                                                            jint, jlongArray, jlong);
+                                                            jint, jlongArray,
+                                                            jlong);
 
 /*
  * Class:     org_tensorflow_Tensor
@@ -38,6 +39,14 @@ JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_allocate(JNIEnv *, jclass,
 JNIEXPORT jlong JNICALL
 Java_org_tensorflow_Tensor_allocateScalarBytes(JNIEnv *, jclass, jbyteArray);
 
+/*
+ * Class:     org_tensorflow_Tensor
+ * Method:    allocateNonScalarBytes
+ * Signature: ([J[Ljava/lang/Object;)J
+ */
+JNIEXPORT jlong JNICALL Java_org_tensorflow_Tensor_allocateNonScalarBytes(
+    JNIEnv *, jclass, jlongArray, jobjectArray);
+
 /*
  * Class:     org_tensorflow_Tensor
  * Method:    delete
@@ -52,7 +61,7 @@ JNIEXPORT void JNICALL Java_org_tensorflow_Tensor_delete(JNIEnv *, jclass,
  * Signature: (J)Ljava/nio/ByteBuffer;
  */
 JNIEXPORT jobject JNICALL Java_org_tensorflow_Tensor_buffer(JNIEnv *, jclass,
-                                                              jlong);
+                                                            jlong);
 
 /*
  * Class:     org_tensorflow_Tensor
diff --git a/tensorflow/java/src/test/java/org/tensorflow/TensorTest.java b/tensorflow/java/src/test/java/org/tensorflow/TensorTest.java
index 3ff59e71b2..bb5f9a0708 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/TensorTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/TensorTest.java
@@ -386,6 +386,30 @@ public class TensorTest {
     }
   }
 
+  @Test
+  public void testNDimensionalStringTensor() {
+    byte[][][] matrix = new byte[4][3][];
+    for (int i = 0; i < 4; ++i) {
+      for (int j = 0; j < 3; ++j) {
+        matrix[i][j] = String.format("(%d, %d) = %d", i, j, i << j).getBytes(UTF_8);
+      }
+    }
+    try (Tensor t = Tensor.create(matrix)) {
+      assertEquals(DataType.STRING, t.dataType());
+      assertEquals(2, t.numDimensions());
+      assertArrayEquals(new long[] {4, 3}, t.shape());
+
+      byte[][][] got = t.copyTo(new byte[4][3][]);
+      assertEquals(4, got.length);
+      for (int i = 0; i < 4; ++i) {
+        assertEquals(String.format("%d", i), 3, got[i].length);
+        for (int j = 0; j < 3; ++j) {
+          assertArrayEquals(String.format("(%d, %d)", i, j), matrix[i][j], got[i][j]);
+        }
+      }
+    }
+  }
+
   @Test
   public void failCreateOnMismatchedDimensions() {
     int[][][] invalid = new int[3][1][];
-- 
GitLab


From f02033fe3e44c8cfd2c4e1b32fcd595c365be843 Mon Sep 17 00:00:00 2001
From: Derek Murray 
Date: Mon, 28 Aug 2017 13:37:26 -0700
Subject: [PATCH 030/294] Handle multiply-captured resources in
 CapturedFunction.

Previously, if the list of captured inputs to a `Dataset.map()` (or
other function-wrapping `Dataset`) contained the same resource more
than once, it would raise an unrecoverable `AlreadyExistsError`. Since
this can (apparently) happen, this change makes the resource-capturing
logic robust to this possibility.

PiperOrigin-RevId: 166749921
---
 .../kernel_tests/map_dataset_op_test.py       | 26 +++++++++++++++++++
 tensorflow/core/kernels/captured_function.cc  | 20 ++++++++++++--
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
index b3af84a953..97b4ec44fc 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
@@ -383,6 +383,32 @@ class MapDatasetTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  def testCaptureSameResourceMultipleTimes(self):
+    elements = np.random.randint(100, size=[200])
+    queue = data_flow_ops.FIFOQueue(
+        200, dtypes.int64, shapes=[], shared_name="shared_queue")
+    queue_2 = data_flow_ops.FIFOQueue(
+        200, dtypes.int64, shapes=[], shared_name="shared_queue")
+
+    enqueue_op = queue.enqueue_many(elements)
+    close_op = queue.close()
+
+    iterator = (dataset_ops.Dataset.from_tensors(0).repeat(-1)
+                .map(lambda _: (queue.dequeue(), queue_2.dequeue()))
+                .make_initializable_iterator())
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(enqueue_op)
+      sess.run(close_op)
+      sess.run(init_op)
+      for i in range(100):
+        self.assertEqual(sorted([elements[i * 2], elements[i * 2 + 1]]),
+                         sorted(sess.run(get_next)))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(get_next)
+
   def testCaptureVariable(self):
     counter_var = variable_scope.get_variable(
         "counter", (), dtypes.int32, use_resource=True)
diff --git a/tensorflow/core/kernels/captured_function.cc b/tensorflow/core/kernels/captured_function.cc
index 87751b3cd5..b9de1e303b 100644
--- a/tensorflow/core/kernels/captured_function.cc
+++ b/tensorflow/core/kernels/captured_function.cc
@@ -59,8 +59,24 @@ Status CapturedFunction::Create(
     } else if (!s.ok()) {                                                      \
       return s;                                                                \
     }                                                                          \
-    TF_RETURN_IF_ERROR(device->resource_manager()->Create(                     \
-        input_handle.container(), input_handle.name(), resource));             \
+    ResourceType* already_created_resource;                                    \
+    /* Look up the resource in the this function's resource manager, in case   \
+     * it has already been created. */                                         \
+    s = device->resource_manager()->Lookup(input_handle.container(),           \
+                                           input_handle.name(),                \
+                                           &already_created_resource);         \
+    if (s.ok()) {                                                              \
+      CHECK_EQ(resource, already_created_resource);                            \
+      resource->Unref();                                                       \
+      already_created_resource->Unref();                                       \
+    } else {                                                                   \
+      if (errors::IsNotFound(s)) {                                             \
+        TF_RETURN_IF_ERROR(device->resource_manager()->Create(                 \
+            input_handle.container(), input_handle.name(), resource));         \
+      } else {                                                                 \
+        return s;                                                              \
+      }                                                                        \
+    }                                                                          \
     continue;                                                                  \
   }
 
-- 
GitLab


From e36c9d98f5d2a6ffea53f4d3f2e970d56d584924 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Mon, 28 Aug 2017 14:09:27 -0700
Subject: [PATCH 031/294] Scrub names in dump_graph.cc for .?[] as well.

PiperOrigin-RevId: 166754872
---
 tensorflow/compiler/tf2xla/dump_graph.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/tf2xla/dump_graph.cc b/tensorflow/compiler/tf2xla/dump_graph.cc
index af5753c260..ddd912b873 100644
--- a/tensorflow/compiler/tf2xla/dump_graph.cc
+++ b/tensorflow/compiler/tf2xla/dump_graph.cc
@@ -38,7 +38,8 @@ string MakeUniquePath(string name) {
 
   // Remove illegal characters from `name`.
   for (int i = 0; i < name.size(); ++i) {
-    if (name[i] == '/') {
+    char ch = name[i];
+    if (ch == '/' || ch == '[' || ch == ']' || ch == '*' || ch == '?') {
       name[i] = '_';
     }
   }
-- 
GitLab


From 2546f7930e4488dd7fa6f482ed1fb389d2d32774 Mon Sep 17 00:00:00 2001
From: Vivek Rane 
Date: Sun, 9 Jul 2017 23:16:36 -0700
Subject: [PATCH 032/294] Added MKL element-wise ops that utilize eigen ops as
 their back-end. Also added an input-conversion op that ensures that shapes of
 both input tensors are compatible (same or broadcastable). Added
 SquaredDifference to layout pass, and fixed the test for layout pass (it
 assumed Add/Mul/Sub would not be substituted with Mkl ops) Fixed missing edge
 deletion for 2 edges Fixing condition for checking broadcast Added more
 sanity checks Changed check for incoming control edges to code that moves
 control edges from the elementwise node to the inputconversion node. Fixed
 bug in CHECK for input types, which did not consider output number of source
 while checking datatype Fixed unit test bugs and added SquaredDifference
 elementwise op for CycleGAN Fixed merge issue (code duplication) Ran
 buildifier and clang-format on changed code Fixed a couple of merge issues
 and clang-format changes outside the modified code

---
 tensorflow/core/BUILD                         |   4 +
 .../core/common_runtime/mkl_cpu_allocator.h   |   4 +-
 tensorflow/core/graph/mkl_layout_pass.cc      | 132 +++++---
 tensorflow/core/graph/mkl_layout_pass_test.cc | 288 +++++++++---------
 .../core/graph/mkl_tfconversion_pass.cc       | 154 ++++++++++
 tensorflow/core/kernels/BUILD                 |  30 ++
 tensorflow/core/kernels/cwise_ops_common.cc   |   2 +
 .../core/kernels/mkl_cwise_ops_common.cc      |  88 ++++++
 tensorflow/core/kernels/mkl_identity_op.cc    |   4 +-
 .../core/kernels/mkl_input_conversion_op.cc   | 259 ++++++++++++++++
 .../{mkl_tfconv_op.cc => mkl_tfconv_op.h}     |  30 +-
 tensorflow/core/ops/math_ops.cc               |  76 +++++
 tensorflow/core/ops/nn_ops.cc                 |  23 ++
 tensorflow/core/util/mkl_util.h               | 125 ++++++++
 14 files changed, 1022 insertions(+), 197 deletions(-)
 create mode 100644 tensorflow/core/kernels/mkl_cwise_ops_common.cc
 create mode 100644 tensorflow/core/kernels/mkl_input_conversion_op.cc
 rename tensorflow/core/kernels/{mkl_tfconv_op.cc => mkl_tfconv_op.h} (80%)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 03aae9fb74..e6d17c93f8 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -777,8 +777,10 @@ cc_library(
     ]) + if_mkl([
         "//tensorflow/core/kernels:mkl_concat_op",
         "//tensorflow/core/kernels:mkl_conv_op",
+        "//tensorflow/core/kernels:mkl_cwise_ops_common",
         "//tensorflow/core/kernels:mkl_fused_batch_norm_op",
         "//tensorflow/core/kernels:mkl_identity_op",
+        "//tensorflow/core/kernels:mkl_input_conversion_op",
         "//tensorflow/core/kernels:mkl_lrn_op",
         "//tensorflow/core/kernels:mkl_pooling_ops",
         "//tensorflow/core/kernels:mkl_relu_op",
@@ -2466,8 +2468,10 @@ tf_cc_test_mkl(
         "//tensorflow/cc:sendrecv_ops",
         "//tensorflow/core/kernels:mkl_concat_op",
         "//tensorflow/core/kernels:mkl_conv_op",
+        "//tensorflow/core/kernels:mkl_cwise_ops_common",
         "//tensorflow/core/kernels:mkl_fused_batch_norm_op",
         "//tensorflow/core/kernels:mkl_identity_op",
+        "//tensorflow/core/kernels:mkl_input_conversion_op",
         "//tensorflow/core/kernels:mkl_lrn_op",
         "//tensorflow/core/kernels:mkl_pooling_ops",
         "//tensorflow/core/kernels:mkl_relu_op",
diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
index 005aabf9b8..f16da10d7a 100644
--- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h
+++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h
@@ -75,12 +75,12 @@ class MklCPUAllocator : public Allocator {
   // Hooks provided by this allocator for memory allocation routines from MKL
 
   static inline void* MallocHook(size_t size) {
-    VLOG(2) << "MklCPUAllocator: In MallocHook";
+    VLOG(3) << "MklCPUAllocator: In MallocHook";
     return cpu_allocator()->AllocateRaw(kAlignment, size);
   }
 
   static inline void FreeHook(void* ptr) {
-    VLOG(2) << "MklCPUAllocator: In FreeHook";
+    VLOG(3) << "MklCPUAllocator: In FreeHook";
     cpu_allocator()->DeallocateRaw(ptr);
   }
 
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 2f9ceaa3bd..fcb38fa1e0 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -279,17 +279,26 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias";
     csinfo_.mkl_conv2d_with_bias_backprop_bias =
                                    "_MklConv2DWithBiasBackpropBias";
-    csinfo_.relu                  = "Relu";
-    csinfo_.relu_grad             = "ReluGrad";
-    csinfo_.reshape               = "Reshape";
-    csinfo_.split                 = "Split";
+    csinfo_.relu = "Relu";
+    csinfo_.relu_grad = "ReluGrad";
+    csinfo_.reshape = "Reshape";
+    csinfo_.split = "Split";
+    // Element-wise ops. Ensure you also add any new ops to IsOpElementWise
+    // in the MklUtil.h (IsMklElementWiseOp method) to ensure that the
+    // MklInputConversion op is added before it.
+    csinfo_.add = "Add";
+    csinfo_.maximum = "Maximum";
+    csinfo_.mul = "Mul";
+    csinfo_.sub = "Sub";
+    csinfo_.squared_difference = "SquaredDifference";
+    // End - element-wise ops. See note above.
 
     // NOTE: names are alphabetically sorted.
     rinfo_.push_back({csinfo_.avg_pool,
-                      GetMklOpName(csinfo_.avg_pool),
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
                       CopyAttrsPooling, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.avg_pool_grad,
-                      GetMklOpName(csinfo_.avg_pool_grad),
+                      mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad),
                       CopyAttrsPooling, AlwaysRewrite, nullptr});
     // BiasAddGrad gets written into Conv2DWithBiasBackpropBias depending
     // on if context contains Conv2D.
@@ -303,50 +312,66 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
                       CopyAttrsBiasAddGrad, ContextMatchRewrite,
                       &biasaddgrad_matmul_context_});
     rinfo_.push_back({csinfo_.concat,
-                      GetMklOpName(csinfo_.concat),
+                      mkl_op_registry::GetMklOpName(csinfo_.concat),
                       CopyAttrsConcat, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.concatv2,
-                      GetMklOpName(csinfo_.concatv2),
+                      mkl_op_registry::GetMklOpName(csinfo_.concatv2),
                       CopyAttrsConcatV2, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.conv2d,
-                      GetMklOpName(csinfo_.conv2d),
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d),
                       CopyAttrsConv2D, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.conv2d_grad_filter,
-                      GetMklOpName(csinfo_.conv2d_grad_filter),
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_filter),
                       CopyAttrsConv2D, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.conv2d_grad_input,
-                      GetMklOpName(csinfo_.conv2d_grad_input),
+                      mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input),
                       CopyAttrsConv2D, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.fused_batch_norm,
-                      GetMklOpName(csinfo_.fused_batch_norm),
+                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm),
                       CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.fused_batch_norm_grad,
-                      GetMklOpName(csinfo_.fused_batch_norm_grad),
+                      mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
                       CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.identity,
-                      GetMklOpName(csinfo_.identity),
+                      mkl_op_registry::GetMklOpName(csinfo_.identity),
                       CopyAttrsIdentity, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.lrn,
-                      GetMklOpName(csinfo_.lrn),
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn),
                       CopyAttrsLRN, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.lrn_grad,
-                      GetMklOpName(csinfo_.lrn_grad),
+                      mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
                       CopyAttrsLRN, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.max_pool,
-                      GetMklOpName(csinfo_.max_pool),
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool),
                       CopyAttrsPooling, NonDepthBatchWisePoolRewrite, nullptr});
     rinfo_.push_back({csinfo_.max_pool_grad,
-                      GetMklOpName(csinfo_.max_pool_grad),
+                      mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
                       CopyAttrsPooling, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.relu,
-                      GetMklOpName(csinfo_.relu),
-                      CopyAttrsRelu, AlwaysRewrite, nullptr});
+                      mkl_op_registry::GetMklOpName(csinfo_.relu),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.relu_grad,
-                      GetMklOpName(csinfo_.relu_grad),
-                      CopyAttrsRelu, AlwaysRewrite, nullptr});
+                      mkl_op_registry::GetMklOpName(csinfo_.relu_grad),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.reshape,
-                      GetMklOpName(csinfo_.reshape),
+                      mkl_op_registry::GetMklOpName(csinfo_.reshape),
                       CopyAttrsReshape, AlwaysRewrite, nullptr});
+    // Element-wise ops
+    rinfo_.push_back({csinfo_.add,
+                      mkl_op_registry::GetMklOpName(csinfo_.add),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.sub,
+                      mkl_op_registry::GetMklOpName(csinfo_.sub),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.mul,
+                      mkl_op_registry::GetMklOpName(csinfo_.mul),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.maximum,
+                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.squared_difference,
+                      mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
 
     // Add info about which ops to add workspace edge to and the slots.
     wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
@@ -455,6 +480,11 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     string relu_grad;
     string reshape;
     string split;
+    string sub;
+    string add;
+    string mul;
+    string maximum;
+    string squared_difference;
   } ConstStringsInfo;
 
  private:
@@ -502,15 +532,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     return N;
   }
 
-  // Get the name of Mkl op from original TensorFlow op
-  // We prefix 'Mkl' to the original op to get Mkl op.
-  // TODO(nhasabni) We should move this to mkl_util.h.
-  inline string GetMklOpName(const string& name) const {
-    // Prefix that we add to Tensorflow op name to construct Mkl op name.
-    const char* const kMklOpPrefix = "_Mkl";
-    return string(kMklOpPrefix) + name;
-  }
-
   // Can op represented by node 'n' run on DEVICE_CPU?
   // Op can run on CPU with MKL if the runtime assigned device or the
   // user requested device contains device CPU, or both are empty.
@@ -915,7 +936,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   static void CopyAttrsIdentity(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsRelu(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb);
 
@@ -1282,7 +1303,7 @@ void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded(
   TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
   for (auto ws : wsinfo_) {
     if (orig_node->type_string() == ws.fwd_op &&
-        mkl_op_registry::IsMklOp(GetMklOpName(orig_node->type_string()), T)) {
+        mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(orig_node->type_string()), T)) {
       // If this op is a fwd op, then we need to check if there is an
       // edge from this node's fwd_slot to bwdop's bwd_slot. If there is
       // an edge, then we just add an attribute on this node for setting
@@ -1308,7 +1329,7 @@ void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded(
         nb->Attr("workspace_enabled", false);
       }
     } else if (orig_node->type_string() == ws.bwd_op &&
-               mkl_op_registry::IsMklOp(GetMklOpName(orig_node->type_string()),
+               mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(orig_node->type_string()),
                                         T)) {
       // If this op is a bwd op, then we need to add workspace edge and
       // it's Mkl tensor edge between its corresponding fwd op and this
@@ -1324,7 +1345,7 @@ void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded(
         if (e->src_output() == ws.fwd_slot &&
             // We would have rewritten the forward op, so we need to use
             // GetMklOpName call to get its Mkl name.
-            e->src()->type_string() == GetMklOpName(ws.fwd_op) &&
+            e->src()->type_string() == mkl_op_registry::GetMklOpName(ws.fwd_op) &&
             e->dst_input() == ws.bwd_slot) {
           nb->Attr("workspace_enabled", true);
           CHECK_NOTNULL(ws_tensors);
@@ -1475,8 +1496,8 @@ void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node,
   nb->Attr("data_format", data_format);
 }
 
-void MklLayoutRewritePass::CopyAttrsRelu(const Node* orig_node,
-                                         NodeBuilder* nb) {
+void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node,
+                                             NodeBuilder* nb) {
   DataType T;
 
   // Get all attributes from old node.
@@ -1842,7 +1863,15 @@ Status MklLayoutRewritePass::RewriteNode(std::unique_ptr* g,
   }
 
   // Get all inputs.
-  const int num_inputs = orig_node->in_edges().size();
+  int num_inputs = orig_node->in_edges().size();
+
+  // Drop count for control edges from inputs
+  for (const Edge* e : orig_node->in_edges()) {
+    if (e->IsControlEdge()) {
+      num_inputs--;
+    }
+  }
+
   gtl::InlinedVector control_edges;
   gtl::InlinedVector, 4> inputs(num_inputs);
   FillInputs(orig_node, &control_edges, &inputs);
@@ -1956,7 +1985,30 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
 
   // BiasAddGrad is not an Mkl layer, so we make an exception for it.
   if (n->type_string() != csinfo_.bias_add_grad) {
-    if (!mkl_op_registry::IsMklOp(GetMklOpName(n->type_string()), T)) {
+    if (!mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(n->type_string()), T)) {
+      return nullptr;
+    }
+  }
+
+  // For elementwise node, we reuse the Eigen implementation and pass the MKL
+  // metadata tensor through so we can avoid conversions. However, if all
+  // incoming edges are in TF format, we don't need all this overhead, so
+  // replace the elementwise node only if at least one of its parents is a MKL
+  // node.
+  if (mkl_op_registry::IsMklElementWiseOp(
+          mkl_op_registry::GetMklOpName(n->type_string()), T)) {
+    bool incoming_mkl_edge = false;
+    for (auto parent : n->in_edges()) {
+      if (mkl_op_registry::IsMklOp(
+              mkl_op_registry::GetMklOpName(parent->src()->type_string()), T)) {
+        incoming_mkl_edge = true;
+      } else {
+        VLOG(1) << "Non-MKL parent is: " << parent->src()->type_string();
+      }
+    }
+    if (incoming_mkl_edge == false) {
+      VLOG(1) << "Skipping replacement of elementwise node which has no MKL "
+                 "parents.";
       return nullptr;
     }
   }
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index 482e339802..1445dfd4b2 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -133,19 +133,19 @@ TEST_F(MklLayoutPassTest, Basic) {
   InitGraph(
       "node { name: 'A' op: 'Input'}"
       "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Mul);D(Mul)|"
+            "A(Input);B(Input);C(Zeta);D(Zeta)|"
             "A->C;A->D;B->C:1;B->D:1");
 }
 
 // Test set 1: Conv2D + AddBias
 
-// C=_MklConv2D(A,M,B,N); E=BiasAdd(C,D); Z=Sub(E,Y) (for interleaved ordering)
-// C=_MklConv2D(A,B,M,N); E=BiasAdd(C,D); Z=Sub(E,Y) (for contiguous ordering)
+// C=_MklConv2D(A,M,B,N); E=BiasAdd(C,D); Z=Zeta(E,Y) (for interleaved ordering)
+// C=_MklConv2D(A,B,M,N); E=BiasAdd(C,D); Z=Zeta(E,Y) (for contiguous ordering)
 TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive) {
   CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
   InitGraph(
@@ -166,18 +166,18 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive) {
       " attr { key: 'data_format'      value { s: 'NCHW' } }"
       " input: ['C', 'D'] }"
       "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Sub'"
+      "node { name: 'Z' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['E', 'Y']}");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);D(Input);DMT/_0(Const);E(_MklConv2DWithBias);"
-            "M(_MklInput);N(_MklInput);Y(Input);Z(Sub)|A->E;"
+            "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->E;"
             "A:control->DMT/_0:control;B->E:1;D->E:2;DMT/_0->E:5;E->Z;M->E:3;"
             "N->E:4;Y->Z:1");
 }
 
-// C=_MklConv2D(A,M:1,B,N:1); E=BiasAdd(C,D); Z=Sub(E,Y) (for interleaved)
-// C=_MklConv2D(A,B,M:1,N:1); E=BiasAdd(C,D); Z=Sub(E,Y) (for contiguous)
+// C=_MklConv2D(A,M:1,B,N:1); E=BiasAdd(C,D); Z=Zeta(E,Y) (for interleaved)
+// C=_MklConv2D(A,B,M:1,N:1); E=BiasAdd(C,D); Z=Zeta(E,Y) (for contiguous)
 // Test for correct output slots selected
 TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive1) {
   CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
@@ -199,17 +199,17 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive1) {
       " attr { key: 'data_format'      value { s: 'NCHW' } }"
       " input: ['C', 'D'] }"
       "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Sub'"
+      "node { name: 'Z' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['E', 'Y']}");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);D(Input);DMT/_0(Const);E(_MklConv2DWithBias);"
-            "M(_MklInput2);N(_MklInput2);Y(Input);Z(Sub)|A->E;"
+            "M(_MklInput2);N(_MklInput2);Y(Input);Z(Zeta)|A->E;"
             "A:control->DMT/_0:control;B->E:1;D->E:2;DMT/_0->E:5;E->Z;"
             "M:1->E:3;N:1->E:4;Y->Z:1");
 }
 
-// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Sub(E,Y);
+// C=Conv2D(A,B); E=BiasAdd(C,D); Z=Zeta(E,Y);
 // This is a case of node rewrite followed by node merge.
 // We will first rewrite Conv2D to _MklConv2D, and then merge _MklConv2D
 // with BiasAdd to produce _MklConv2DWithBias.
@@ -231,12 +231,12 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Positive2) {
       " attr { key: 'data_format'      value { s: 'NCHW' } }"
       " input: ['C', 'D'] }"
       "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Sub'"
+      "node { name: 'Z' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['E', 'Y']}");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);E(_MklConv2DWithBias);Y(Input);Z(Sub)|"
+            "DMT/_2(Const);E(_MklConv2DWithBias);Y(Input);Z(Zeta)|"
             "A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
             "A:control->DMT/_2:control;B->E:1;D->E:2;DMT/_0->E:3;DMT/_1->E:4;"
             "DMT/_2->E:5;E->Z;Y->Z:1");
@@ -286,7 +286,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow1) {
             "M(_MklInput);N(_MklInput)|A->C;B->C:1;D->F;E->F:1;M->C:2;N->C:3");
 }
 
-// _MklConv2D has two outgoing edges: BiasAdd and some other dummy node (Add).
+// _MklConv2D has two outgoing edges: BiasAdd and some other dummy node (Zeta).
 // Merge should not be done in such case.
 TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow2) {
   InitGraph(
@@ -308,12 +308,12 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_Negative_Dataflow2) {
       " attr { key: 'data_format'      value { s: 'NCHW' } }"
       " input: ['D', 'E'] }"  // Conv2D has two outputs.
                               // No merge should happen.
-      "node { name: 'G' op: 'Add'"
+      "node { name: 'G' op: 'Zeta'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " input: ['C', 'E'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(_MklConv2D);D(Input);E(Input);F(BiasAdd);"
-            "G(Add);M(_MklInput);N(_MklInput)|A->C;B->C:1;C->G;D->F;"
+            "G(Zeta);M(_MklInput);N(_MklInput)|A->C;B->C:1;C->G;D->F;"
             "E->F:1;E->G:1;M->C:2;N->C:3");
 }
 
@@ -362,7 +362,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Positive) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Sub'"
+      "node { name: 'E' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['D', 'A']}"
       "node { name: 'F' op: 'Int32Input'}"
@@ -387,7 +387,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Positive) {
       " input: ['E'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);DMT/_0(Const);"
-            "E(Sub);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
+            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
             "I(_MklConv2DBackpropInput);J(_MklConv2DWithBiasBackpropBias);"
             "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G;B->D:1;"
             "B->I:1;C->D:2;D->E;DMT/_0->J:1;E->G:2;E->I:2;E->J;"
@@ -413,7 +413,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative1) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Sub'"
+      "node { name: 'E' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['D', 'A']}"
       "node { name: 'F' op: 'Int32Input'}"
@@ -438,7 +438,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative1) {
       " input: ['E'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Sub);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
+            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
             "I(_MklConv2DBackpropInput);J(BiasAddGrad);"
             "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;"
             "B->I:1;C->D:2;D->E;E->G;E->I:2;E->J;F->G:1;H->I;M->D:3;M->G:3;"
@@ -463,7 +463,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative2) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['B', 'A', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Sub'"
+      "node { name: 'E' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['D', 'A']}"
       "node { name: 'F' op: 'Int32Input'}"
@@ -488,7 +488,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative2) {
       " input: ['E'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Sub);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
+            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(Int32Input);"
             "I(_MklConv2DBackpropInput);J(BiasAddGrad);"
             "M(_MklInput);N(_MklInput);O(_MklInput)|A->D:1;A->E:1;A->G;B->D;"
             "B->I:1;C->D:2;D->E;E->G:2;E->I:2;E->J;F->G:1;H->I;M->D:3;M->G:3;"
@@ -512,7 +512,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Positive) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Sub'"
+      "node { name: 'E' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['D', 'A']}"
       "node { name: 'F' op: 'Int32Input'}"
@@ -529,7 +529,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Positive) {
       " input: ['E'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);DMT/_0(Const);"
-            "E(Sub);F(Int32Input);G(_MklConv2DBackpropFilter);"
+            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);"
             "H(_MklConv2DWithBiasBackpropBias);M(_MklInput);N(_MklInput);"
             "O(_MklInput)|A->D;A->E:1;A->G;B->D:1;C->D:2;D->E;DMT/_0->H:1;"
             "E->G:2;E->H;E:control->DMT/_0:control;F->G:1;M->D:3;M->G:3;"
@@ -553,7 +553,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Negative1) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Sub'"
+      "node { name: 'E' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['D', 'A']}"
       "node { name: 'F' op: 'Int32Input'}"
@@ -570,7 +570,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Negative1) {
       " input: ['E'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Sub);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
+            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
             "M(_MklInput);N(_MklInput);O(_MklInput)|A->D;A->E:1;A->G:2;B->D:1;"
             "C->D:2;D->E;E->G;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;"
             "O->G:5");
@@ -593,7 +593,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Negative2) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['B', 'A', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Sub'"
+      "node { name: 'E' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['D', 'A']}"
       "node { name: 'F' op: 'Int32Input'}"
@@ -610,7 +610,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Negative2) {
       " input: ['E'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Sub);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
+            "E(Zeta);F(Int32Input);G(_MklConv2DBackpropFilter);H(BiasAddGrad);"
             "M(_MklInput);N(_MklInput);O(_MklInput)|A->D:1;A->E:1;A->G;B->D;"
             "C->D:2;D->E;E->G:2;E->H;F->G:1;M->D:3;M->G:3;N->D:4;N->G:4;O->D:5;"
             "O->G:5");
@@ -618,8 +618,8 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_BpropFilter_Negative2) {
 
 // No _MklConv2DWithBias in context, but _MklConv2D in context.
 // No rewrite for BiasAddGrad should happen.
-// C=_MklConv2D(A,M,B,N); D=Sub(C,A); E=BiasAddGrad(D) (for interleaved)
-// C=_MklConv2D(A,B,M,N); D=Sub(C,A); E=BiasAddGrad(D) (for contiguous)
+// C=_MklConv2D(A,M,B,N); D=Zeta(C,A); E=BiasAddGrad(D) (for interleaved)
+// C=_MklConv2D(A,B,M,N); D=Zeta(C,A); E=BiasAddGrad(D) (for contiguous)
 TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Neg_NoMklConv2DWithBias) {
   InitGraph(
       "node { name: 'A' op: 'Input'}"
@@ -633,7 +633,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Neg_NoMklConv2DWithBias) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B', 'M', 'N']}"
-      "node { name: 'D' op: 'Sub'"
+      "node { name: 'D' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['C', 'A']}"
       "node { name: 'E' op: 'BiasAddGrad'"
@@ -641,21 +641,21 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Neg_NoMklConv2DWithBias) {
       " attr { key: 'data_format'      value { s: 'NCHW' } }"
       " input: ['D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Sub);E(BiasAddGrad);"
+            "A(Input);B(Input);C(_MklConv2D);D(Zeta);E(BiasAddGrad);"
             "M(_MklInput);N(_MklInput)|A->C;A->D:1;B->C:1;C->D;D->E;"
             "M->C:2;N->C:3");
 }
 
 // No Conv2D in the context for BiasAddGrad. No rewrite should happen.
-// C=Add(A,B); D=Sub(C,A); E=BiasAddGrad(D)
+// C=Polygamma(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
 TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative_NoConv2D) {
   InitGraph(
       "node { name: 'A' op: 'Input'}"
       "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Add'"
+      "node { name: 'C' op: 'Polygamma'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Sub'"
+      "node { name: 'D' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['C', 'A']}"
       "node { name: 'E' op: 'BiasAddGrad'"
@@ -663,13 +663,13 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative_NoConv2D) {
       " attr { key: 'data_format'      value { s: 'NCHW' } }"
       " input: ['D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Add);D(Sub);E(BiasAddGrad)|"
+            "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|"
             "A->C;A->D:1;B->C:1;C->D;D->E");
 }
 
 // No Conv2D in the context for BiasAddGrad, but MatMul in context.
 // Rewrite should happen, but name of BiasAddGrad does not change.
-// C=MatMul(A,B); D=Sub(C,A); E=BiasAddGrad(D)
+// C=MatMul(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
 TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative_NoConv2D_MatMul) {
   InitGraph(
       "node { name: 'A' op: 'Input'}"
@@ -679,7 +679,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative_NoConv2D_MatMul) {
       " attr { key: 'transpose_a'      value { b: false } }"
       " attr { key: 'transpose_b'      value { b: false } }"
       " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Sub'"
+      "node { name: 'D' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['C', 'A']}"
       "node { name: 'E' op: 'BiasAddGrad'"
@@ -687,12 +687,12 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_Negative_NoConv2D_MatMul) {
       " attr { key: 'data_format'      value { s: 'NCHW' } }"
       " input: ['D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(MatMul);D(Sub);E(BiasAddGrad)|"
+            "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|"
             "A->C;A->D:1;B->C:1;C->D;D->E");
 }
 
 // Test set 3: MatMul..BiasAddGrad -> BiasAddGrad rewrite tests
-// C=MatMul(A,B); D=Sub(C,A); E=BiasAddGrad(D)
+// C=MatMul(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
 TEST_F(MklLayoutPassTest, NodeMerge_MatMulBiasAddGrad_Positive) {
   InitGraph(
       "node { name: 'A' op: 'Input'}"
@@ -702,7 +702,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_MatMulBiasAddGrad_Positive) {
       " attr { key: 'transpose_a'      value { b: false } }"
       " attr { key: 'transpose_b'      value { b: false } }"
       " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Sub'"
+      "node { name: 'D' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['C', 'A']}"
       "node { name: 'E' op: 'BiasAddGrad'"
@@ -710,20 +710,20 @@ TEST_F(MklLayoutPassTest, NodeMerge_MatMulBiasAddGrad_Positive) {
       " attr { key: 'data_format'      value { s: 'NCHW' } }"
       " input: ['D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(MatMul);D(Sub);E(BiasAddGrad)|"
+            "A(Input);B(Input);C(MatMul);D(Zeta);E(BiasAddGrad)|"
             "A->C;A->D:1;B->C:1;C->D;D->E");
 }
 
 // No MatMul in the context for BiasAddGrad. No rewrite should happen.
-// C=Add(A,B); D=Sub(C,A); E=BiasAddGrad(D)
+// C=Polygamma(A,B); D=Zeta(C,A); E=BiasAddGrad(D)
 TEST_F(MklLayoutPassTest, NodeMerge_MatMulBiasAddGrad_Negative_NoMatMul) {
   InitGraph(
       "node { name: 'A' op: 'Input'}"
       "node { name: 'B' op: 'Input'}"
-      "node { name: 'C' op: 'Add'"
+      "node { name: 'C' op: 'Polygamma'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Sub'"
+      "node { name: 'D' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['C', 'A']}"
       "node { name: 'E' op: 'BiasAddGrad'"
@@ -731,7 +731,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_MatMulBiasAddGrad_Negative_NoMatMul) {
       " attr { key: 'data_format'      value { s: 'NCHW' } }"
       " input: ['D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Add);D(Sub);E(BiasAddGrad)|"
+            "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|"
             "A->C;A->D:1;B->C:1;C->D;D->E");
 }
 
@@ -752,10 +752,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Basic) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['B', 'C'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklConv2D);D(Mul);DMT/_0(Const);"
+            "A(Input);B(Input);C(_MklConv2D);D(Zeta);DMT/_0(Const);"
             "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
             "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
             "DMT/_1->C:3");
@@ -781,11 +781,11 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'C']}"
-      "node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(_MklConv2D);D(_MklConv2D);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(Mul)|A->C;A->D;"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->C;A->D;"
             "A:control->DMT/_0:control;A:control->DMT/_1:control;"
             "A:control->DMT/_2:control;B->C:1;C->D:1;C->E;"
             "C:1->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2");
@@ -803,10 +803,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Negative_UnsupportedType) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_HALF } }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_HALF } }"
       " input: ['B', 'C'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(HalfInput);B(HalfInput);C(Conv2D);D(Mul)|"
+            "A(HalfInput);B(HalfInput);C(Conv2D);D(Zeta)|"
             "A->C;B->C:1;B->D;C->D:1");
 }
 
@@ -822,11 +822,11 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B', 'C']}"
-      "node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropFilter);"
-            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Mul)|"
+            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
             "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
             "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
             "DMT/_1->D:4;DMT/_2->D:5");
@@ -844,11 +844,11 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradInput_Positive) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['B', 'A', 'C']}"
-      "node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropInput);"
-            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Mul)|"
+            "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|"
             "A->D:1;A->E;B->D;B:control->DMT/_0:control;"
             "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;"
             "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
@@ -869,11 +869,11 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Basic) {
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " attr { key: 'N'                value { i: 2 } }"
       " input: ['A', 'B:0', 'B:1']}"
-      "node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Const);B(InputList);C(Input);D(_MklConcat);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(Mul)|A->D;A:control->DMT/_0:control;"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A:control->DMT/_0:control;"
             "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;"
             "B:1->D:2;C->E;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
 }
@@ -908,12 +908,12 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) {
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " attr { key: 'N'                value { i: 2 } }"
       " input: ['G', 'E', 'F']}"
-      "node { name: 'I' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'H'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
             "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
-            "F(_MklConv2D);G(Const);H(_MklConcat);I(Mul)|A->E;A->I;"
+            "F(_MklConv2D);G(Const);H(_MklConcat);I(Zeta)|A->E;A->I;"
             "A:control->DMT/_2:control;A:control->DMT/_3:control;"
             "B->E:1;C->F;C:control->DMT/_0:control;C:control->DMT/_1:control;"
             "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
@@ -935,7 +935,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'D']}"
       "node { name: 'G' op: 'Const' "
       " attr { key: 'dtype' value { type: DT_INT32 } }"
@@ -946,12 +946,12 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) {
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " attr { key: 'N'                value { i: 2 } }"
       " input: ['G', 'E', 'F']}"
-      "node { name: 'I' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'H'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Mul);G(Const);"
-            "H(_MklConcat);I(Mul)|A->E;A->I;A:control->DMT/_0:control;"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
+            "H(_MklConcat);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
             "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
             "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:1->H:4;F->H:2;"
             "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1");
@@ -973,11 +973,11 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Basic) {
       " attr { key: 'Tidx'             value { type: DT_INT32 } }"
       " attr { key: 'N'                value { i: 2 } }"
       " input: ['B:0', 'B:1', 'A']}"
-      "node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Const);B(InputList);C(Input);D(_MklConcatV2);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(Mul)|A->D:2;B->D;B:1->D:1;"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D:2;B->D;B:1->D:1;"
             "B:control->DMT/_0:control;B:control->DMT/_1:control;"
             "B:control->DMT/_2:control;C->E;D->E:1;DMT/_0->D:3;"
             "DMT/_1->D:4;DMT/_2->D:5");
@@ -1014,12 +1014,12 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) {
       " attr { key: 'Tidx'             value { type: DT_INT32 } }"
       " attr { key: 'N'                value { i: 2 } }"
       " input: ['E', 'F', 'G']}"
-      "node { name: 'I' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'H'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
             "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);"
-            "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Mul)|A->E;A->I;"
+            "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Zeta)|A->E;A->I;"
             "A:control->DMT/_2:control;A:control->DMT/_3:control;B->E:1;C->F;"
             "C:control->DMT/_0:control;C:control->DMT/_1:control;"
             "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;"
@@ -1041,7 +1041,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B']}"
-      "node { name: 'F' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'D']}"
       "node { name: 'G' op: 'Const' "
       " attr { key: 'dtype' value { type: DT_INT32 } }"
@@ -1053,12 +1053,12 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) {
       " attr { key: 'Tidx'             value { type: DT_INT32 } }"
       " attr { key: 'N'                value { i: 2 } }"
       " input: ['E', 'F', 'G']}"
-      "node { name: 'I' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'H'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Mul);G(Const);"
-            "H(_MklConcatV2);I(Mul)|A->E;A->I;A:control->DMT/_0:control;"
+            "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);"
+            "H(_MklConcatV2);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;"
             "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;"
             "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:1->H:3;"
             "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;"
@@ -1071,10 +1071,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Relu_Positive) {
       "node { name: 'B' op: 'Relu'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklRelu);C(Mul);DMT/_0(Const)|A->B;A->C;"
+            "A(Input);B(_MklRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;"
             "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
 }
 
@@ -1085,10 +1085,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_Positive) {
       "node { name: 'C' op: 'ReluGrad'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'C'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(_MklReluGrad);D(Mul);DMT/_0(Const);"
+            "A(Input);B(Input);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
             "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;"
             "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3");
 }
@@ -1102,10 +1102,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ReluReluGrad_Positive) {
       "node { name: 'C' op: 'ReluGrad'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'C'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklRelu);C(_MklReluGrad);D(Mul);DMT/_0(Const);"
+            "A(Input);B(_MklRelu);C(_MklReluGrad);D(Zeta);DMT/_0(Const);"
             "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;"
             "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;"
             "DMT/_1->C:2");
@@ -1121,10 +1121,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklAvgPool);C(Mul);DMT/_0(Const)|A->B;A->C;"
+            "A(Input);B(_MklAvgPool);C(Zeta);DMT/_0(Const)|A->B;A->C;"
             "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
 }
 
@@ -1139,10 +1139,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolGrad_Positive) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
       " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['B', 'C'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Int32Input);B(Input);C(_MklAvgPoolGrad);D(Mul);DMT/_0(Const);"
+            "A(Int32Input);B(Input);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
             "DMT/_1(Const)|A->C;A:control->DMT/_0:control;"
             "A:control->DMT/_1:control;B->C:1;B->D;C->D:1;DMT/_0->C:2;"
             "DMT/_1->C:3");
@@ -1166,10 +1166,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_AvgPoolAvgPoolGrad_Positive) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
       " input: ['I', 'B'] }"
-      "node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'C'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklAvgPool);C(_MklAvgPoolGrad);D(Mul);DMT/_0(Const);"
+            "A(Input);B(_MklAvgPool);C(_MklAvgPoolGrad);D(Zeta);DMT/_0(Const);"
             "DMT/_1(Const);I(Int32Input)|A->B;A->D;A:control->DMT/_0:control;"
             "B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;DMT/_1->C:2;I->C;"
             "I:control->DMT/_1:control");
@@ -1188,12 +1188,12 @@ TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNormGrad_Positive) {
       " attr { key: 'epsilon'      value { f: 0.0001 } }"
       " attr { key: 'is_training'  value { b: true } }"
       " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'F'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
             "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
-            "F(_MklFusedBatchNormGrad);G(Mul)|A->F;A->G;"
+            "F(_MklFusedBatchNormGrad);G(Zeta)|A->F;A->G;"
             "A:control->DMT/_0:control;A:control->DMT/_1:control;"
             "A:control->DMT/_2:control;A:control->DMT/_3:control;"
             "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
@@ -1214,12 +1214,12 @@ TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_Positive) {
       " attr { key: 'epsilon'      value { f: 0.0001 } }"
       " attr { key: 'is_training'  value { b: true } }"
       " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'F'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
             "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Input);"
-            "F(_MklFusedBatchNorm);G(Mul)|A->F;A->G;"
+            "F(_MklFusedBatchNorm);G(Zeta)|A->F;A->G;"
             "A:control->DMT/_0:control;A:control->DMT/_1:control;"
             "A:control->DMT/_2:control;A:control->DMT/_3:control;"
             "A:control->DMT/_4:control;B->F:1;C->F:2;D->F:3;"
@@ -1268,12 +1268,12 @@ TEST_F(MklLayoutPassTest, MaxPoolLRN_Positive) {
       " attr { key: 'depth_radius' value { i: 2 } }"
       " input: ['E', 'F', 'B'] }"
       "node { name: 'H' op: 'Input'}"
-      "node { name: 'I' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['H', 'G'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
       "A(Input);B(_MklLRN);C(_MklMaxPool);D(Input);DMT/_0(Const);DMT/_1(Const);"
       "DMT/_2(Const);E(_MklMaxPoolGrad);F(Input);G(_MklLRNGrad);H(Input);"
-      "I(Mul)|A->B;A:control->DMT/_0:control;B->C;B->E;B->G:2;B:1->G:3;"
+      "I(Zeta)|A->B;A:control->DMT/_0:control;B->C;B->E;B->G:2;B:1->G:3;"
       "B:2->C:1;B:2->E:4;B:2->G:6;B:3->G:7;B:control->DMT/_1:control;C->E:1;"
       "C:1->E:3;C:2->E:5;C:3->E:7;D->E:2;DMT/_0->B:1;DMT/_1->E:6;DMT/_2->G:5;"
       "E->G;E:1->G:4;E:control->DMT/_2:control;F->G:1;G->I:1;H->I");
@@ -1301,11 +1301,11 @@ TEST_F(MklLayoutPassTest, LRN_Positive) {
       " attr { key: 'data_format'  value { s: 'NCHW' } }"
       " attr { key: 'depth_radius' value { i: 2 } }"
       " input: ['C', 'D', 'B'] }"
-      "node { name: 'F' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'E'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
-            "DMT/_2(Const);E(_MklLRNGrad);F(Mul)|"
+            "DMT/_2(Const);E(_MklLRNGrad);F(Zeta)|"
             "A->B;A:control->DMT/_0:control;B->E:2;B:1->E:3;B:2->E:6;B:3->E:7;"
             "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
             "D->E:1;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:5;E->F:1");
@@ -1323,10 +1323,10 @@ TEST_F(MklLayoutPassTest, LRN_Negative1) {
       " attr { key: 'data_format'  value { s: 'NCHW' } }"
       " attr { key: 'depth_radius' value { i: 2 } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklLRN);C(Mul);DMT/_0(Const)|"
+            "A(Input);B(_MklLRN);C(Zeta);DMT/_0(Const)|"
             "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
 }
 
@@ -1344,11 +1344,11 @@ TEST_F(MklLayoutPassTest, LRN_Negative2) {
       " attr { key: 'data_format'  value { s: 'NCHW' } }"
       " attr { key: 'depth_radius' value { i: 2 } }"
       " input: ['A', 'B', 'C'] }"
-      "node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(_MklLRNGrad);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Mul)|"
+            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
             "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
             "A:control->DMT/_2:control;A:control->DMT/_3:control;"
             "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
@@ -1386,12 +1386,12 @@ TEST_F(MklLayoutPassTest, LRN_Negative3) {
       " attr { key: 'data_format'  value { s: 'NCHW' } }"
       " attr { key: 'depth_radius' value { i: 2 } }"
       " input: ['C', 'B', 'D'] }"
-      "node { name: 'G' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['E', 'F'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(_MklLRN);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);"
             "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);DMT/_5(Const);"
-            "DMT/_6(Const);E(_MklLRNGrad);F(_MklLRNGrad);G(Mul)|A->B;"
+            "DMT/_6(Const);E(_MklLRNGrad);F(_MklLRNGrad);G(Zeta)|A->B;"
             "A:control->DMT/_0:control;B->E:2;"
             "B->F:1;B:1->E:3;B:2->E:6;B:2->F:5;B:3->E:7;C->E;C->F;"
             "C:control->DMT/_1:control;C:control->DMT/_2:control;"
@@ -1421,11 +1421,11 @@ TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Positive) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
       " input: ['C', 'B', 'D'] }"
-      "node { name: 'F' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'E'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(_MklMaxPool);C(Input);D(Input);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);E(_MklMaxPoolGrad);F(Mul)|"
+            "DMT/_1(Const);DMT/_2(Const);E(_MklMaxPoolGrad);F(Zeta)|"
             "A->B;A:control->DMT/_0:control;B->E:1;B:1->E:3;B:2->E:5;B:3->E:7;"
             "C->E;C->F;C:control->DMT/_1:control;C:control->DMT/_2:control;"
             "D->E:2;DMT/_0->B:1;DMT/_1->E:4;DMT/_2->E:6;E->F:1");
@@ -1444,10 +1444,10 @@ TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative1) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(_MklMaxPool);C(Mul);DMT/_0(Const)|"
+            "A(Input);B(_MklMaxPool);C(Zeta);DMT/_0(Const)|"
             "A->B;A->C;A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
 }
 
@@ -1466,11 +1466,11 @@ TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative2) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:2, i:2} } }"
       " input: ['A', 'B', 'C'] }"
-      "node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'D'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(_MklMaxPoolGrad);DMT/_0(Const);"
-            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Mul)|"
+            "DMT/_1(Const);DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(Zeta)|"
             "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;"
             "A:control->DMT/_2:control;A:control->DMT/_3:control;"
             "A:control->DMT/_4:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;"
@@ -1489,10 +1489,10 @@ TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative3) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Mul)|A->B;A->C;B->C:1");
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
 }
 
 // Test MaxPool handling for batch-wise pooling (NCHW)
@@ -1507,10 +1507,10 @@ TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative4) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Mul)|A->B;A->C;B->C:1");
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
 }
 
 // Test MaxPool handling for depth-wise pooling (NHWC)
@@ -1525,10 +1525,10 @@ TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative5) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Mul)|A->B;A->C;B->C:1");
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
 }
 
 // Test MaxPool handling for depth-wise pooling (NCHW)
@@ -1543,10 +1543,10 @@ TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative6) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:2, i:1, i:1} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Mul)|A->B;A->C;B->C:1");
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
 }
 
 // Test MaxPool handling for batch-wise pooling (NHWC)
@@ -1561,10 +1561,10 @@ TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative7) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Mul)|A->B;A->C;B->C:1");
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
 }
 
 // Test MaxPool handling for batch-wise pooling (NHWC)
@@ -1579,10 +1579,10 @@ TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative8) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 2, i:1, i:1, i:1} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Mul)|A->B;A->C;B->C:1");
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
 }
 
 // Test MaxPool handling for depth-wise pooling (NHWC)
@@ -1597,10 +1597,10 @@ TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative9) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Mul)|A->B;A->C;B->C:1");
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
 }
 
 // Test MaxPool handling for depth-wise pooling (NHWC)
@@ -1615,10 +1615,10 @@ TEST_F(MklLayoutPassTest, NodeWorkspace_MaxPool_Negative10) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:2} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }");
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Mul)|A->B;A->C;B->C:1");
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
 }
 
 /////////////////////////////////////////////////////////////////////
@@ -1636,10 +1636,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_DeviceTest) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B']}"
-      "node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['B', 'C'] }", kGPUDevice);
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(Conv2D);D(Mul)|A->C;B->C:1;B->D;C->D:1");
+            "A(Input);B(Input);C(Conv2D);D(Zeta)|A->C;B->C:1;B->D;C->D:1");
 }
 
 TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) {
@@ -1657,7 +1657,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B', 'C', 'M', 'N', 'O']}"
-      "node { name: 'E' op: 'Sub'"
+      "node { name: 'E' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['D', 'A']}"
       "node { name: 'F' op: 'BiasAddGrad'"
@@ -1666,7 +1666,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DBackprop_DeviceTest) {
       " input: ['E'] }", kGPUDevice);
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(_MklConv2DWithBias);"
-            "E(Sub);F(BiasAddGrad);M(_MklInput);N(_MklInput);"
+            "E(Zeta);F(BiasAddGrad);M(_MklInput);N(_MklInput);"
             "O(_MklInput)|A->D;A->E:1;B->D:1;C->D:2;D->E;E->F;"
             "M->D:3;N->D:4;O->D:5");
 }
@@ -1683,10 +1683,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_DeviceTest) {
       " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
       " attr { key: 'padding'          value { s: 'SAME' } }"
       " input: ['A', 'B', 'C']}"
-      "node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'D'] }", kGPUDevice);
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Int32Input);C(Input);D(Conv2DBackpropFilter);E(Mul)|"
+            "A(Input);B(Int32Input);C(Input);D(Conv2DBackpropFilter);E(Zeta)|"
             "A->D;A->E;B->D:1;C->D:2;D->E:1");
 }
 
@@ -1696,10 +1696,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Relu_DeviceTest) {
       "node { name: 'B' op: 'Relu'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }", kGPUDevice);
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Relu);C(Mul)|A->B;A->C;B->C:1");
+            "A(Input);B(Relu);C(Zeta)|A->B;A->C;B->C:1");
 }
 
 TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_DeviceTest) {
@@ -1709,10 +1709,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_DeviceTest) {
       "node { name: 'C' op: 'ReluGrad'"
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }"
-      "node { name: 'D' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'C'] }", kGPUDevice);
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(Input);C(ReluGrad);D(Mul)|A->C;A->D;B->C:1;C->D:1");
+            "A(Input);B(Input);C(ReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1");
 }
 
 TEST_F(MklLayoutPassTest, NodeRewrite_MaxPool_DeviceTest) {
@@ -1725,10 +1725,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_MaxPool_DeviceTest) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }", kGPUDevice);
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(MaxPool);C(Mul)|A->B;A->C;B->C:1");
+            "A(Input);B(MaxPool);C(Zeta)|A->B;A->C;B->C:1");
 }
 
 TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_DeviceTest) {
@@ -1741,10 +1741,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_DeviceTest) {
       " attr { key: 'padding'      value { s: 'VALID' } }"
       " attr { key: 'strides'      value { list: {i: 1, i:1, i:1, i:1} } }"
       " input: ['A'] }"
-      "node { name: 'C' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'B'] }", kGPUDevice);
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Input);B(AvgPool);C(Mul)|A->B;A->C;B->C:1");
+            "A(Input);B(AvgPool);C(Zeta)|A->B;A->C;B->C:1");
 }
 
 // Concat Op test: Concat with no Mkl layer feeding it
@@ -1762,10 +1762,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_DeviceTest) {
       " attr { key: 'T'                value { type: DT_FLOAT } }"
       " attr { key: 'N'                value { i: 2 } }"
       " input: ['A', 'B:0', 'B:1']}"
-      "node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'D'] }", kGPUDevice);
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(Concat);E(Mul)|A->D;"
+            "A(Const);B(InputList);C(Input);D(Concat);E(Zeta)|A->D;"
             "B->D:1;B:1->D:2;C->E;D->E:1");
 }
 
@@ -1784,10 +1784,10 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_DeviceTest) {
       " attr { key: 'Tidx'             value { type: DT_INT32 } }"
       " attr { key: 'N'                value { i: 2 } }"
       " input: ['B:0', 'B:1', 'A']}"
-      "node { name: 'E' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['C', 'D'] }", kGPUDevice);
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
-            "A(Const);B(InputList);C(Input);D(ConcatV2);E(Mul)|"
+            "A(Const);B(InputList);C(Input);D(ConcatV2);E(Zeta)|"
             "A->D:2;B->D;B:1->D:1;C->E;D->E:1");
 }
 
@@ -1804,11 +1804,11 @@ TEST_F(MklLayoutPassTest, NodeRewrite_FusedBatchNorm_DeviceTest) {
       " attr { key: 'epsilon'      value { f: 0.0001 } }"
       " attr { key: 'is_training'  value { b: true } }"
       " input: ['A', 'B', 'C', 'D', 'E'] }"
-      "node { name: 'G' op: 'Mul' attr { key: 'T' value { type: DT_FLOAT } }"
+      "node { name: 'G' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
       " input: ['A', 'F'] }", kGPUDevice);
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(Input);D(Input);E(Input);"
-            "F(FusedBatchNorm);G(Mul)|A->F;A->G;B->F:1;C->F:2;D->F:3;"
+            "F(FusedBatchNorm);G(Zeta)|A->F;A->G;B->F:1;C->F:2;D->F:3;"
             "E->F:4;F->G:1");
 }
 
@@ -1832,12 +1832,12 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_DeviceTest) {
       " attr { key: 'data_format'      value { s: 'NCHW' } }"
       " input: ['C', 'D'] }"
       "node { name: 'Y' op: 'Input'}"
-      "node { name: 'Z' op: 'Sub'"
+      "node { name: 'Z' op: 'Zeta'"
       " attr {key: 'T'                 value { type: DT_FLOAT } }"
       " input: ['E', 'Y']}", kGPUDevice);
   EXPECT_EQ(DoMklLayoutOptimizationPass(),
             "A(Input);B(Input);C(_MklConv2D);D(Input);E(BiasAdd);"
-            "M(_MklInput);N(_MklInput);Y(Input);Z(Sub)|A->C;"
+            "M(_MklInput);N(_MklInput);Y(Input);Z(Zeta)|A->C;"
             "B->C:1;C->E;D->E:1;E->Z;M->C:2;N->C:3;Y->Z:1");
 }
 
@@ -1853,7 +1853,7 @@ static void BM_MklLayoutRewritePass(int iters, int op_nodes) {
   random::SimplePhilox rnd(&philox);
   for (int op = 0; op < op_nodes; op++) {
     s += strings::Printf(
-        "node { name: 'op%04d' op: 'Mul' attr { key: 'T' value { "
+        "node { name: 'op%04d' op: 'Zeta' attr { key: 'T' value { "
         "type: DT_FLOAT } } input: ['in%04d', 'in%04d' ] }",
         op, rnd.Uniform(10), rnd.Uniform(10));
   }
diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
index 590b3d030f..1c7b101fd5 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -64,6 +64,10 @@ namespace tensorflow {
 // in the Mkl format. Non-compliant ops accept inputs and outputs in the
 // TensorFlow format.
 //
+// ADDENDUM: For element-wise ops, we may or may not need a conversion to
+// take place before we hit the op. For this, we add a new op before each
+// element-wise MKL op to deal with the inputs, called _MklInputConversion.
+// This pass has been enhanced to add this capability.
 class MklToTfConversionPass : public GraphOptimizationPass {
  public:
   MklToTfConversionPass() {}
@@ -87,6 +91,16 @@ class MklToTfConversionPass : public GraphOptimizationPass {
     return mkl_op_registry::IsMklOp(op_name, T);
   }
 
+  // Is the input Op supported by Mkl-specific layout AND
+  //  is it element-wise?
+  //
+  // @input op_name string of the op
+  // @input T Datatype to use for checking input op
+  // @return true if op is Mkl supported; false, otherwise.
+  inline bool IsMklElementWiseOp(const string& op_name, DataType T) const {
+    return mkl_op_registry::IsMklElementWiseOp(op_name, T);
+  }
+
   // Insert layout conversion node on the edge pointed by 'e' from graph 'g'.
   //
   // Edge will be deleted once a call to this function is successful.
@@ -96,6 +110,17 @@ class MklToTfConversionPass : public GraphOptimizationPass {
   // @return Success:OK() if insertion is successful, otherwise returns
   //         appropriate error status code.
   Status InsertConversionNodeOnEdge(std::unique_ptr* g, Edge*);
+
+  // For element-wise ops, we need to sanitize the inputs. For this, we add a
+  // new node at the input of the replacement element-wise node that checks
+  // the inputs and converts one/both of them as required. See the op code
+  // comments for details.
+  //
+  // Insert input conversion node as parent of 'n' from graph 'g'.
+  //
+  // @return Success:OK() if insertion is successful, otherwise returns
+  //         appropriate error status code.
+  Status InsertInputConversionNode(std::unique_ptr* g, Node*);
 };
 
 // We register MklToTf insertion for phase 2 in post-partition grouping
@@ -171,6 +196,92 @@ Status MklToTfConversionPass::InsertConversionNodeOnEdge(
   return Status::OK();
 }
 
+Status MklToTfConversionPass::InsertInputConversionNode(
+    std::unique_ptr* g, Node* n) {
+  CHECK_NOTNULL(n);
+
+  // Get the input nodes and edges
+  std::vector edges;
+  TF_CHECK_OK(n->input_edges(&edges));
+  if (edges.size() != 4) {
+    return Status(error::Code::INVALID_ARGUMENT,
+                  "MKL Binary Element-wise op should have exactly 2 data"
+                  " inputs and 2 metadata inputs");
+  }
+
+  // Sanity check: ensure that both inputs are of the expected type, and the
+  // same type as input type
+  CHECK_EQ(BaseType(edges[0]->src()->output_type(edges[0]->src_output())),
+           BaseType(edges[1]->src()->output_type(edges[1]->src_output())));
+  CHECK_EQ(BaseType(edges[0]->src()->output_type(edges[0]->src_output())),
+           BaseType(n->input_type(0)));
+
+  // Check ordering of edges
+  for (uint i = 0; i < 4; i++) {
+    CHECK_EQ((edges[i]->dst_input() == i), true);
+  }
+
+  // Build the conversion node and specify src as input.
+  Node* conversion_node = nullptr;
+
+  TF_CHECK_OK(
+      NodeBuilder((*g)->NewName("MklInputConversion"), "_MklInputConversion")
+          .Input(edges[0]->src(), edges[0]->src_output())
+          .Input(edges[1]->src(), edges[1]->src_output())
+          .Input(edges[2]->src(), edges[2]->src_output())
+          .Input(edges[3]->src(), edges[3]->src_output())
+          .Device(n->def().device())
+          .Attr("T", n->input_type(0))
+          .Finalize(&**g, &conversion_node));
+
+  CHECK_NOTNULL(conversion_node);
+
+  // Change the destination of any control edges to the InputConversion node
+  if (edges.size() != n->in_edges().size()) {
+    std::vector edges_to_remove;
+    for (const Edge* e : n->in_edges()) {
+      if (e->IsControlEdge()) {
+        CHECK_NOTNULL((*g)->AddControlEdge(e->src(), conversion_node));
+        edges_to_remove.push_back(e);
+      }
+    }
+    for (const Edge* e : edges_to_remove) {
+      (*g)->RemoveEdge(e);
+    }
+  }
+
+  string data_format;
+  if (GetNodeAttr(edges[0]->src()->def(), "data_format", &data_format) ==
+      Status::OK()) {
+    conversion_node->AddAttr("data_format", data_format);
+  }
+
+  // Get assigned device from destination node and apply it to conversion node.
+  // We want conversion node to be on the same device as the destination node.
+  conversion_node->set_assigned_device_name(n->assigned_device_name());
+
+  // Set the Mkl op label for this op.
+  conversion_node->AddAttr("_kernel", mkl_op_registry::kMklOpLabel);
+
+  // Now that we have added edges from src->conversion_node, let's add edge from
+  // output of conversion_node to the element-wise node.
+  CHECK_NOTNULL((*g)->AddEdge(conversion_node, 0, n, edges[0]->dst_input()));
+  CHECK_NOTNULL((*g)->AddEdge(conversion_node, 1, n, edges[1]->dst_input()));
+  CHECK_NOTNULL((*g)->AddEdge(conversion_node, 2, n, edges[2]->dst_input()));
+  CHECK_NOTNULL((*g)->AddEdge(conversion_node, 3, n, edges[3]->dst_input()));
+
+  VLOG(1) << "MklToTfConversionPass - InputConversion: Inserting input "
+          << "conversion node on: " << n->type_string() << " successful.";
+
+  // Remove src->dst edge now.
+  (*g)->RemoveEdge(edges[0]);
+  (*g)->RemoveEdge(edges[1]);
+  (*g)->RemoveEdge(edges[2]);
+  (*g)->RemoveEdge(edges[3]);
+
+  return Status::OK();
+}
+
 bool MklToTfConversionPass::RunPass(std::unique_ptr* g) {
   bool result = false;
 
@@ -239,6 +350,49 @@ bool MklToTfConversionPass::RunPass(std::unique_ptr* g) {
 
   DumpGraph("After MklToTfConversionPass", &**g);
 
+  //---------------------------------------------------------------------------
+  // Check all nodes and add an input-conversion-node if the node is an mkl
+  // element-wise node.
+  VLOG(1) << "Before running MklToTfConversionPass - InputConversion";
+
+  std::vector candidate_nodes;
+  std::vector order;
+  GetReversePostOrder(**g, &order);  // This will give us topological sort.
+
+  for (Node* n : order) {
+    // If node is not an op or it does not have a datatype, then skip.
+    DataType datatype;
+    if (!n->IsOp() || (GetNodeAttr(n->def(), "T", &datatype) != Status::OK())) {
+      continue;
+    }
+    if (IsMklElementWiseOp(n->type_string(), datatype)) {
+      // If the input node is an input-conversion op, skip
+      Node* input_node = nullptr;
+      TF_CHECK_OK(n->input_node(0, &input_node));
+      DataType input_datatype;
+      if ((GetNodeAttr(n->def(), "T", &input_datatype) == Status::OK()) &&
+          (input_node->type_string().compare("_MklInputConversion") == 0)) {
+        continue;
+      }
+
+      VLOG(1) << "MklToTfConversionPass: InputConversion: Scheduled node "
+              << n->name() << " for inserting input conversion node";
+      candidate_nodes.push_back(const_cast(n));
+    }
+  }
+
+  // Process all candidate edges and insert conversion nodes on them.
+  for (Node* n : candidate_nodes) {
+    // Even if we insert conversion node on a single node, we
+    // need to return true.
+    if (InsertInputConversionNode(g, n) == Status::OK()) {
+      VLOG(1) << "MklToTfConversionPass: Inserted conversion "
+              << "on node " << n->name();
+      result = true;
+    }
+  }
+  DumpGraph("After MklToTfConversionPass - InputConversion", &**g);
+
   // We need to return true even if we insert one conversion node
   // anywhere in the graph.
   return result;
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index bba4171f92..8f2e2da784 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -5465,6 +5465,22 @@ tf_mkl_kernel_library(
     ],
 )
 
+tf_mkl_kernel_library(
+    name = "mkl_input_conversion_op",
+    hdrs = ["mkl_tfconv_op.h"],
+    prefix = "mkl_input_conversion",
+    deps = [
+        ":bounds_check",
+        ":ops_util",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//third_party/mkl:intel_binary_blob",
+    ],
+)
+
 tf_mkl_kernel_library(
     name = "mkl_pooling_ops",
     srcs = [
@@ -5539,6 +5555,20 @@ tf_mkl_kernel_library(
     ],
 )
 
+tf_mkl_kernel_library(
+    name = "mkl_cwise_ops_common",
+    hdrs = [
+        "cwise_ops.h",
+        "cwise_ops_common.h",
+        "cwise_ops_gradients.h",
+    ],
+    prefix = "mkl_cwise_ops_common",
+    deps = NN_DEPS + [
+        "cwise_op",
+        "//third_party/mkl:intel_binary_blob",
+    ],
+)
+
 cc_library(
     name = "dataset",
     srcs = ["dataset.cc"],
diff --git a/tensorflow/core/kernels/cwise_ops_common.cc b/tensorflow/core/kernels/cwise_ops_common.cc
index 192a4f732e..693c6467ac 100644
--- a/tensorflow/core/kernels/cwise_ops_common.cc
+++ b/tensorflow/core/kernels/cwise_ops_common.cc
@@ -20,7 +20,9 @@ namespace tensorflow {
 BinaryOpShared::BinaryOpShared(OpKernelConstruction* ctx, DataType out,
                                DataType in)
     : OpKernel(ctx) {
+#ifndef INTEL_MKL
   OP_REQUIRES_OK(ctx, ctx->MatchSignature({in, in}, {out}));
+#endif
 }
 
 void BinaryOpShared::SetUnimplementedError(OpKernelContext* ctx) {
diff --git a/tensorflow/core/kernels/mkl_cwise_ops_common.cc b/tensorflow/core/kernels/mkl_cwise_ops_common.cc
new file mode 100644
index 0000000000..7fc633c254
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_cwise_ops_common.cc
@@ -0,0 +1,88 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0(the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+// See docs in ../ops/math_ops.cc.
+
+#define EIGEN_USE_THREADS
+#include 
+#include 
+
+#include "tensorflow/core/kernels/cwise_ops_common.h"
+
+#include "tensorflow/core/util/mkl_util.h"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+template 
+class MklBinaryOp : public BinaryOp {
+ public:
+  explicit MklBinaryOp(OpKernelConstruction* context)
+      : BinaryOp(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    auto in0 = context->input(0);
+    auto in1 = context->input(1);
+    VLOG(1) << "Shapes (start mklbinaryop compute): "
+            << in0.shape().DebugString() << " _and_ "
+            << in1.shape().DebugString();
+
+    // Call the TensorFlow BinaryOp Compute method
+    BinaryOp::Compute(context);
+
+    auto out = context->mutable_output(0);
+    VLOG(1) << "Shapes (output): " << out->shape().DebugString();
+
+    // Pass input shape through to ouput shape
+    ForwardMklMetaDataInToOut(context, 0, 0);
+
+    out = context->mutable_output(0);
+    VLOG(1) << "Shapes (output): " << out->shape().DebugString();
+  }
+};
+
+//---------- Registration macros for various element-wise ops -----------
+// We will need to redefine "REGISTER" to include the mkl_op_registry flag
+#pragma push_macro("REGISTER")
+#undef REGISTER
+#define REGISTER(OP, D, N, F, T)                                    \
+  REGISTER_KERNEL_BUILDER(Name(N)                                   \
+                              .Device(DEVICE_##D)                   \
+                              .TypeConstraint("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          OP>);
+
+REGISTER5(MklBinaryOp, CPU, "_MklAdd", functor::add, float, Eigen::half, double,
+          int32, int64);
+REGISTER7(MklBinaryOp, CPU, "_MklSub", functor::sub, float, Eigen::half, double,
+          int32, int64, complex64, complex128);
+REGISTER5(MklBinaryOp, CPU, "_MklMul", functor::mul, float, Eigen::half, double,
+          uint8, int32);
+REGISTER5(MklBinaryOp, CPU, "_MklMaximum", functor::maximum, float, Eigen::half,
+          double, int32, int64);
+REGISTER5(MklBinaryOp, CPU, "_MklSquaredDifference",
+          functor::squared_difference, float, Eigen::half, double, int32,
+          int64);
+
+#undef REGISTER
+#pragma pop_macro("REGISTER")
+//-----------------------------------------------------------------------
+
+}  // end namespace tensorflow
+
+#endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_identity_op.cc b/tensorflow/core/kernels/mkl_identity_op.cc
index ca20294a26..f31e7afd46 100644
--- a/tensorflow/core/kernels/mkl_identity_op.cc
+++ b/tensorflow/core/kernels/mkl_identity_op.cc
@@ -41,9 +41,9 @@ class MklIdentityOp : public OpKernel {
     bool input_in_mkl_format = mkl_shape_input.IsMklTensor();
 
     if (input_in_mkl_format) {
-      ForwarMklTensorInToOut(context, 0, 0);
+      ForwardMklTensorInToOut(context, 0, 0);
     } else {
-      FowardTfTensorInToOut(context, 0, 0);
+      ForwardTfTensorInToOut(context, 0, 0);
     }
   }
 
diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc
new file mode 100644
index 0000000000..b58e44e398
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc
@@ -0,0 +1,259 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef INTEL_MKL
+
+#include 
+#include 
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+#include "tensorflow/core/kernels/mkl_tfconv_op.h"
+#include "tensorflow/core/util/mkl_util.h"
+
+namespace tensorflow {
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+///////////////////////////////////////////////////////////
+//               Op kernel
+// Checks and ensures that the 2 inputs are compatible for mkl binary ops.
+// Here's the basic logic:
+//
+// if both inputs are in TF format:
+//   pass the inputs through to the output
+// else if both inputs are in mkl format:
+//   if both have the same shape:
+//     pass the inputs through to the output
+// 	else:
+// 		convert both to TF
+// else if one is TF and one is MKL:
+// 	if broadcast is needed:
+// 		convert the MKL format input to TF format
+// 	else:
+// 		convert the TF format input to MKL format
+///////////////////////////////////////////////////////////
+
+template 
+class MklInputConversionOp : public OpKernel {
+ public:
+  explicit MklInputConversionOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES_OK(context, context->GetAttr("T", &op_data_type));
+    has_avx512f_ = port::TestCPUFeature(port::CPUFeature::AVX512F);
+  }
+
+ private:
+  void Compute(OpKernelContext* context) override {
+    // Check if input tensors are in MKL format.
+    const Tensor& input_tensor_0 = MklGetInput(context, 0);
+    MklShape input_shape_0;
+    GetMklShape(context, 0, &input_shape_0);
+
+    const Tensor& input_tensor_1 = MklGetInput(context, 1);
+    MklShape input_shape_1;
+    GetMklShape(context, 1, &input_shape_1);
+
+    bool tf_shapes_are_same = MklCompareShapes(&context->input(0).shape(),
+                                               &context->input(1).shape());
+
+    VLOG(1) << "MklInputConversionOp: Input shapes are "
+            << (tf_shapes_are_same ? "*same*" : "*different*") << ": "
+            << context->input(0).shape().DebugString() << " and "
+            << context->input(1).shape().DebugString();
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // if both inputs are in TF format, just copy input tensors to output.
+    if (!input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
+      VLOG(1) << "MklInputConversionOp: No conversion needed, "
+              << "copying TF inputs to output";
+
+      ForwardTfTensorInToOut(context, 0, 0);
+      ForwardTfTensorInToOut(context, 1, 1);
+      return;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // If both inputs are in MKL format
+    if (input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
+      // If both have the same shape, pass them through
+      if (tf_shapes_are_same) {
+        VLOG(1) << "MklInputConversionOp: No conversion needed, "
+                << "copying MKL inputs with identical shapes to output";
+
+        ForwardMklTensorInToOut(context, 0, 0);
+        ForwardMklTensorInToOut(context, 1, 1);
+        return;
+      }
+
+      // Sanity check
+      bool mkl_shapes_are_same =
+          MklCompareShapes(&input_shape_0, &input_shape_1);
+      if (mkl_shapes_are_same) {
+        CHECK(false) << "MklInputConversionOp: Unexpected: TF shapes are "
+                        "different but MKL shapes are same";
+      }
+
+      // Both have different shapes, so broadcast will be necessary.
+      // Convert to TF and pass both tensors through (we can't do broadcast
+      // with MKL tensors)
+      VLOG(1) << "MklInputConversionOp: Broadcast needed, "
+              << "converted MKL inputs to TF format";
+
+      MklToTfOp::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_, 0);
+      MklToTfOp::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_, 1);
+      SetDummyMklShapeOutput(context, 0);
+      SetDummyMklShapeOutput(context, 1);
+      return;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // One input is MKL and one is TF. If no broadcast is needed, convert
+    // the TF tensor to MKL, otherwise convert the MKL tensor to TF format
+    VLOG(1) << "MklInputConversionOp: Inputs in different formats (MKL/TF)";
+
+    const Tensor* mkl_tensor;
+    const MklShape* mkl_shape;
+    const Tensor* tf_tensor;
+    MklShape* tf_mkl_shape;
+    uint mkl_tensor_index;
+    uint tf_tensor_index;
+    if (input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) {
+      mkl_tensor = &input_tensor_0;
+      mkl_shape = &input_shape_0;
+      mkl_tensor_index = 0;
+      tf_tensor = &input_tensor_1;
+      tf_mkl_shape = &input_shape_1;
+      tf_tensor_index = 1;
+    } else if (!input_shape_0.IsMklTensor() && input_shape_1.IsMklTensor()) {
+      mkl_tensor = &input_tensor_1;
+      mkl_shape = &input_shape_1;
+      mkl_tensor_index = 1;
+      tf_tensor = &input_tensor_0;
+      tf_mkl_shape = &input_shape_0;
+      tf_tensor_index = 0;
+    } else {
+      CHECK(false) << "MklInputConversionOp: Unexpected combination of input "
+                      "shapes for MKL "
+                   << "element-wise op";
+    }
+
+    // Broadcast is needed if the shapes are not the same
+    bool broadcast_needed;
+
+    size_t in0_size = 1;
+    for (size_t i = 0; i < mkl_shape->GetDimension(); ++i)
+      in0_size *= mkl_shape->tf_dim_size(i);
+
+    size_t in1_size = 1;
+    for (size_t i = 0; i < tf_tensor->shape().dims(); ++i)
+      in1_size *= tf_tensor->shape().dim_size(i);
+
+    broadcast_needed = (in0_size != in1_size);
+
+    if (!broadcast_needed) {
+      // Both shapes are same, convert the TF input to MKL
+      VLOG(1) << "MklInputConversionOp: No broadcast needed.";
+      VLOG(1) << "MklInputConversionOp: Converting input " << tf_tensor_index
+              << " to MKL format";
+
+      // Create MklShape
+      Tensor* tensor_out;
+      MklShape mkl_output_mkl_shape;
+      mkl_output_mkl_shape.SetMklTensor(true);
+      mkl_output_mkl_shape.SetTfLayout(mkl_shape->GetDimension(),
+                                       mkl_shape->GetSizes(),
+                                       mkl_shape->GetStrides());
+      mkl_output_mkl_shape.SetTfDimOrder(mkl_shape->GetDimension());
+
+      // ** Temporarily borrow the layout from the MKL input **
+      mkl_output_mkl_shape.SetMklLayout(mkl_shape->GetCurLayout());
+
+      // Create output tensor
+      AllocateOutputSetMklShape(context, tf_tensor_index, &tensor_out,
+                                mkl_tensor->shape(), mkl_output_mkl_shape);
+
+      // Since the shapes are the same, use information from the other tensor
+      tf_mkl_shape->SetTfLayout(mkl_shape->GetDimension(),
+                                mkl_shape->GetSizes(), mkl_shape->GetStrides());
+      // Convert the data format
+      tf_mkl_shape->GetConvertedFlatData(
+          mkl_shape->GetCurLayout(),
+          const_cast(tf_tensor->flat().data()),
+          const_cast(tensor_out->flat().data()));
+
+      // ** Release the borrowed layout to avoid double deletion
+      //    in the destructor call **
+      mkl_output_mkl_shape.SetMklLayout(nullptr);
+
+      // -- The tensor in MKL format passes through --
+      ForwardMklTensorInToOut(context, mkl_tensor_index, mkl_tensor_index);
+    } else {
+      // Broadcast is needed, so convert the MKL input to TF
+      VLOG(1) << "MklInputConversionOp: Broadcast needed.";
+      VLOG(1) << "MklInputConversionOp: Converting input " << mkl_tensor_index
+              << " to TF format";
+      MklToTfOp::ConvertMklToTf(this, context, data_format_str,
+                                           op_data_type, has_avx512f_,
+                                           mkl_tensor_index);
+      SetDummyMklShapeOutput(context, mkl_tensor_index);
+
+      // The tensor in TF format passes through
+      ForwardTfTensorInToOut(context, tf_tensor_index, tf_tensor_index);
+    }
+
+    VLOG(1) << "MklInputConversionOp: Shapes (output): "
+            << context->mutable_output(0)->shape().DebugString() << " and "
+            << context->mutable_output(1)->shape().DebugString();
+
+    VLOG(1) << "MklInputConversion completed successfully.";
+  }
+
+ private:
+  /// Data format of the operation
+  string data_format_str;
+
+  /// Data type of the operation
+  DataType op_data_type;
+
+  /// CPUIDInfo
+  bool has_avx512f_ = false;
+};
+
+///////////////////////////////////////////////////////////
+//               Register kernel
+///////////////////////////////////////////////////////////
+
+#define REGISTER_CPU(T)                                             \
+  REGISTER_KERNEL_BUILDER(Name("_MklInputConversion")               \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklInputConversionOp);
+
+TF_CALL_NUMBER_TYPES(REGISTER_CPU);
+#undef REGISTER_CPU
+}  // namespace tensorflow
+#endif  // INTEL_MKL
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.cc b/tensorflow/core/kernels/mkl_tfconv_op.h
similarity index 80%
rename from tensorflow/core/kernels/mkl_tfconv_op.cc
rename to tensorflow/core/kernels/mkl_tfconv_op.h
index b48c735d12..a240ee44fb 100644
--- a/tensorflow/core/kernels/mkl_tfconv_op.cc
+++ b/tensorflow/core/kernels/mkl_tfconv_op.h
@@ -15,6 +15,9 @@ limitations under the License.
 
 #ifdef INTEL_MKL
 
+#ifndef TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
+#define TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
+
 #include 
 #include 
 #include "tensorflow/core/framework/numeric_op.h"
@@ -28,9 +31,9 @@ limitations under the License.
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/util/tensor_format.h"
 
-#include "tensorflow/core/util/mkl_util.h"
 #include "mkl_dnn.h"
 #include "mkl_dnn_types.h"
+#include "tensorflow/core/util/mkl_util.h"
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -49,14 +52,22 @@ class MklToTfOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
+    ConvertMklToTf(this, context, data_format_str, op_data_type, has_avx512f_,
+                   0);
+    VLOG(1) << "MKLToTFConversion complete successfully.";
+  }
+
+  static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context,
+                             string data_format_str, DataType op_data_type,
+                             bool has_avx512f, uint input_number) {
     // Check that input tensor is in MKL format.
-    const Tensor& input_tensor = MklGetInput(context, 0);
+    const Tensor& input_tensor = MklGetInput(context, input_number);
     MklShape input_shape;
-    GetMklShape(context, 0, &input_shape);
+    GetMklShape(context, input_number, &input_shape);
 
     // if input is already in Tf format, then just copy input tensor to output.
     if (!input_shape.IsMklTensor()) {
-      context->set_output(0, input_tensor);
+      context->set_output(input_number, input_tensor);
       VLOG(1) << "MKLToTFConversion: No conversion needed, "
               << "copying input to output";
       return;
@@ -64,8 +75,8 @@ class MklToTfOp : public OpKernel {
 
     // Check that input data type is same as operator data type and that it is
     // same as output data type.
-    DataType input_data_type = input_type(0);
-    DataType output_data_type = output_type(0);
+    DataType input_data_type = op_kernel->input_type(input_number);
+    DataType output_data_type = op_kernel->output_type(input_number);
     CHECK_EQ(op_data_type, input_data_type);
     CHECK_EQ(op_data_type, output_data_type);
 
@@ -81,7 +92,7 @@ class MklToTfOp : public OpKernel {
     // Allocate output tensor.
     Tensor* output_tensor = NULL;
     OP_REQUIRES_OK(context,
-                   context->allocate_output(0, output_shape, &output_tensor));
+                   context->allocate_output(input_number, output_shape, &output_tensor));
 
     dnnLayout_t output_layout =
         static_cast(input_shape.GetTfLayout());
@@ -118,7 +129,8 @@ class MklToTfOp : public OpKernel {
                               .Label(mkl_op_registry::kMklOpLabel), \
                           MklToTfOp);
 
-TF_CALL_float(REGISTER_CPU);
+TF_CALL_NUMBER_TYPES(REGISTER_CPU);
 #undef REGISTER_CPU
 }  // namespace tensorflow
-#endif /* INTEL_MKL */
+#endif  // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_
+#endif  // INTEL_MKL
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 6ff05bd2a6..85751e1eb5 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -498,6 +498,24 @@ Returns x + y element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
+REGISTER_OP("_MklAdd")
+    .Input("x: T")
+    .Input("y: T")
+    .Input("mkl_x: uint8")
+    .Input("mkl_y: uint8")
+    .Output("z: T")
+    .Output("mkl_z: uint8")
+    .Attr(
+        "T: {half, float, double, uint8, int8, int16, int32, int64, complex64, "
+        "complex128, string}")
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns x + y element-wise.
+
+*NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
+
 REGISTER_OP("Sub")
     .BINARY_FEWER()
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
@@ -508,6 +526,19 @@ Returns x - y element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
+REGISTER_OP("_MklSub")
+    .BINARY_FEWER()
+    .Input("mkl_x: uint8")
+    .Input("mkl_y: uint8")
+    .Output("mkl_z: uint8")
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns x - y element-wise.
+
+*NOTE*: `Sub` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
+
 REGISTER_OP("Mul")
     .BINARY_MORE()
     .SetIsCommutative()
@@ -519,6 +550,20 @@ Returns x * y element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
+REGISTER_OP("_MklMul")
+    .BINARY_MORE()
+    .Input("mkl_x: uint8")
+    .Input("mkl_y: uint8")
+    .Output("mkl_z: uint8")
+    .SetIsCommutative()
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns x * y element-wise.
+
+*NOTE*: `Mul` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
+
 REGISTER_OP("Div")
     .BINARY_MORE()
     .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
@@ -577,6 +622,20 @@ Returns (x - y)(x - y) element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
+REGISTER_OP("_MklSquaredDifference")
+    .BINARY_FEWER()
+    .Input("mkl_x: uint8")
+    .Input("mkl_y: uint8")
+    .Output("mkl_z: uint8")
+    .SetIsCommutative()
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns (x - y)(x - y) element-wise.
+
+*NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
+
 #undef BINARY_FEWER
 #undef BINARY_MORE
 
@@ -594,6 +653,23 @@ Returns the max of x and y (i.e. x > y ? x : y) element-wise.
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 )doc");
 
+REGISTER_OP("_MklMaximum")
+    .Input("x: T")
+    .Input("y: T")
+    .Input("mkl_x: uint8")
+    .Input("mkl_y: uint8")
+    .Output("z: T")
+    .Output("mkl_z: uint8")
+    .Attr("T: {half, float, double, int32, int64}")
+    .SetIsCommutative()
+    .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn)
+    .Doc(R"doc(
+Returns the max of x and y (i.e. x > y ? x : y) element-wise.
+
+*NOTE*: `Maximum` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+)doc");
+
 REGISTER_OP("Minimum")
     .Input("x: T")
     .Input("y: T")
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 0a96258dd1..b56e73545a 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -3230,6 +3230,29 @@ REGISTER_OP("_MklToTf")
     .Doc(R"doc(
 MKL operator to convert a tensor from MKL layout to TensorFlow layout.
 
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklInputConversion")
+    .Input("input_0: T")
+    .Input("input_1: T")
+    .Input("mkl_input_0: uint8")
+    .Input("mkl_input_1: uint8")
+    .Output("output_0: T")
+    .Output("output_1: T")
+    .Output("mkl_output_0: uint8")
+    .Output("mkl_output_1: uint8")
+    // All datatypes supported by element-wise ops
+    .Attr(
+        "T: {half, float, double, uint8, int8, uint16, int16, int32, int64, "
+        "complex64, complex128}")
+    .Attr(GetConvnetDataFormatAttrString())
+    .Doc(R"doc(
+MKL operator to process the inputs to an elementwise MKL op. Both inputs
+need to be either in TF or in MKL format. This op is added before every
+element-wise MKL op.
+
 NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index cb22a50e8f..c703ecdeb1 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -65,6 +65,8 @@ class MklShape {
 
   void SetDimensions(const size_t dimension) { dimension_ = dimension; }
 
+  void SetMklLayout(dnnLayout_t mklLayout) { mklLayout_ = mklLayout; }
+
   void SetMklLayout(const void* primitive, size_t resourceType) {
     CHECK_EQ(
         dnnLayoutCreateFromPrimitive_F32(&mklLayout_, (dnnPrimitive_t)primitive,
@@ -135,6 +137,7 @@ class MklShape {
   size_t GetDimension() const { return dimension_; }
   const size_t* GetSizes() const { return sizes_; }
   int64 dim_size(int index) const { return sizes_[index]; }
+  int64 tf_dim_size(int index) const { return sizes_[tf_to_mkl_dim_map_[index]]; }
   const size_t* GetStrides() const { return strides_; }
   const size_t* GetTfToMklDimMap() const { return tf_to_mkl_dim_map_; }
   size_t tf_dim_idx(int index) const { return tf_to_mkl_dim_map_[index]; }
@@ -616,6 +619,98 @@ inline void ForwarMklTensorInToOut(OpKernelContext* context,
   }
 }
 
+// Forward the MKL shape ONLY (used in elementwise and other ops where
+// we call the eigen implementation and MKL shape is not used)
+inline void ForwardMklMetaDataInToOut(OpKernelContext* context,
+                                      uint idx_data_in, uint idx_data_out) {
+  uint idx_meta_in = GetTensorMetaDataIndex(idx_data_in, context->num_inputs());
+  uint idx_meta_out =
+      GetTensorMetaDataIndex(idx_data_out, context->num_outputs());
+
+  if (IsRefType(context->input_dtype(idx_data_in))) {
+    context->forward_ref_input_to_ref_output(idx_meta_in, idx_meta_out);
+  } else {
+    context->set_output(idx_meta_out, context->input(idx_meta_in));
+  }
+}
+
+// Set a dummy MKL shape (called when the output is in TF format)
+inline void SetDummyMklShapeOutput(OpKernelContext* context,
+                                   uint idx_data_out) {
+  MklShape mkl_shape_output;
+  mkl_shape_output.SetMklTensor(false);
+  AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output);
+}
+
+// Checks if the TF shape for both MKL tensors is the same or not
+// Returns: true if both TF shapes are the same, false otherwise
+inline bool MklCompareShapes(const MklShape* input_shape_0,
+                             const MklShape* input_shape_1) {
+  // Check for number of dimensions
+  if (input_shape_0->GetDimension() != input_shape_1->GetDimension()) {
+    return false;
+  }
+
+  // Check size of each dimension
+  size_t ndims = input_shape_0->GetDimension();
+  for (size_t i = 0; i < ndims; i++) {
+    if (input_shape_0->dim_size(i) != input_shape_1->dim_size(i)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Checks if the TF shape for both tensors is the same or not
+// Returns: true if TF shapes for both are the same, false otherwise
+inline bool MklCompareShapes(const MklShape* input_shape_0,
+                             const TensorShape* input_shape_1) {
+  // Check for number of dimensions
+  if (input_shape_0->GetDimension() != input_shape_1->dims()) {
+    return false;
+  }
+
+  // Check size of each dimension
+  size_t ndims = input_shape_0->GetDimension();
+  for (size_t i = 0; i < ndims; i++) {
+    if (input_shape_0->tf_dim_size(i) != input_shape_1->dim_size(i)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Checks if the TF shape for both tensors is the same or not
+// Returns: true if TF shapes for both are the same, false otherwise
+inline bool MklCompareShapes(const TensorShape* input_shape_0,
+                             const MklShape* input_shape_1) {
+  return MklCompareShapes(input_shape_1, input_shape_0);
+}
+
+// Checks if the TF shape for both tensors is the same or not
+// Returns: true if TF shapes for both are the same, false otherwise
+inline bool MklCompareShapes(const TensorShape* input_shape_0,
+                             const TensorShape* input_shape_1) {
+  // Check for number of dimensions
+  if (input_shape_0->dims() != input_shape_1->dims()) {
+    return false;
+  }
+
+  // Check size of each dimension
+  size_t ndims = input_shape_0->dims();
+  for (size_t i = 0; i < ndims; i++) {
+    if (input_shape_0->dim_size(i) != input_shape_1->dim_size(i)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// TODO(intel_tf): Remove this routine when faster MKL layout conversion is
+// out.
 inline void MklNHWCToNCHW(const Tensor& input, Tensor** output) {
   const float* buf_in = input.flat().data();
   float* buf_out = (*output)->flat().data();
@@ -652,6 +747,14 @@ namespace mkl_op_registry {
 static const char* kMklOpLabel = "MklOp";
 static const char* kMklOpLabelPattern = "label='MklOp'";
 
+// Get the name of Mkl op from original TensorFlow op
+// We prefix 'Mkl' to the original op to get Mkl op.
+inline string GetMklOpName(const string& name) {
+  // Prefix that we add to Tensorflow op name to construct Mkl op name.
+  const char* const kMklOpPrefix = "_Mkl";
+  return string(kMklOpPrefix) + name;
+}
+
 // Check whether opname with type T is registered as MKL-compliant.
 //
 // @input: name of the op
@@ -667,6 +770,28 @@ static inline bool IsMklOp(const std::string& op_name, DataType T) {
   return result;
 }
 
+// Check whether opname with type T is registered as MKL-compliant and
+// is element-wise.
+//
+// @input: name of the op
+// @input: T datatype to be used for checking op
+// @return: true if opname is registered as element-wise Mkl op
+static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) {
+  if (!IsMklOp(op_name, T)) {
+    return false;
+  }
+
+  bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
+                 0 == op_name.compare(GetMklOpName("Sub")) ||
+                 0 == op_name.compare(GetMklOpName("Mul")) ||
+                 0 == op_name.compare(GetMklOpName("Maximum")) ||
+                 0 == op_name.compare(GetMklOpName("SquaredDifference")));
+
+  VLOG(1) << "mkl_op_registry::" << op_name
+          << " is elementwise MKL op: " << result;
+  return result;
+}
+
 }  // namespace mkl_op_registry
 
 }  // namespace tensorflow
-- 
GitLab


From 4112af784d10fbfa33b8beb9b2f6873d3d55e701 Mon Sep 17 00:00:00 2001
From: Vivek Rane 
Date: Mon, 21 Aug 2017 17:26:34 -0700
Subject: [PATCH 033/294] Reverting accidentally reverted change (sigh)

---
 tensorflow/core/util/mkl_util.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index c703ecdeb1..92cb589d74 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -584,7 +584,7 @@ inline void CopyTfTensorInToOutWithShape(OpKernelContext* context,
   context->set_output(idx_data_out, output);
 }
 
-inline void FowardTfTensorInToOut(OpKernelContext* context,
+inline void ForwardTfTensorInToOut(OpKernelContext* context,
                                   int idx_in, int idx_out) {
   int num_inputs = context->num_inputs();
   int num_outputs = context->num_outputs();
@@ -601,7 +601,7 @@ inline void FowardTfTensorInToOut(OpKernelContext* context,
   }
 }
 
-inline void ForwarMklTensorInToOut(OpKernelContext* context,
+inline void ForwardMklTensorInToOut(OpKernelContext* context,
                                    int idx_in, int idx_out) {
   int num_inputs = context->num_inputs();
   int num_outputs = context->num_outputs();
-- 
GitLab


From aac157b921691482179964cf2b606b88ca6642a3 Mon Sep 17 00:00:00 2001
From: Vivek Rane 
Date: Fri, 25 Aug 2017 16:29:24 -0700
Subject: [PATCH 034/294] Addressed code review comments for sorting by op
 names in mkl_layout_pass

---
 tensorflow/core/graph/mkl_layout_pass.cc | 35 ++++++++++++------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index fcb38fa1e0..762c5426a4 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -289,11 +289,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     csinfo_.add = "Add";
     csinfo_.maximum = "Maximum";
     csinfo_.mul = "Mul";
-    csinfo_.sub = "Sub";
     csinfo_.squared_difference = "SquaredDifference";
+    csinfo_.sub = "Sub";
     // End - element-wise ops. See note above.
 
     // NOTE: names are alphabetically sorted.
+    rinfo_.push_back({csinfo_.add,
+                      mkl_op_registry::GetMklOpName(csinfo_.add),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.avg_pool,
                       mkl_op_registry::GetMklOpName(csinfo_.avg_pool),
                       CopyAttrsPooling, AlwaysRewrite, nullptr});
@@ -347,6 +350,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     rinfo_.push_back({csinfo_.max_pool_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad),
                       CopyAttrsPooling, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.maximum,
+                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
+    rinfo_.push_back({csinfo_.mul,
+                      mkl_op_registry::GetMklOpName(csinfo_.mul),
+                      CopyAttrsDataType, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.relu,
                       mkl_op_registry::GetMklOpName(csinfo_.relu),
                       CopyAttrsDataType, AlwaysRewrite, nullptr});
@@ -356,22 +365,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     rinfo_.push_back({csinfo_.reshape,
                       mkl_op_registry::GetMklOpName(csinfo_.reshape),
                       CopyAttrsReshape, AlwaysRewrite, nullptr});
-    // Element-wise ops
-    rinfo_.push_back({csinfo_.add,
-                      mkl_op_registry::GetMklOpName(csinfo_.add),
+    rinfo_.push_back({csinfo_.squared_difference,
+                      mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
                       CopyAttrsDataType, AlwaysRewrite, nullptr});
     rinfo_.push_back({csinfo_.sub,
                       mkl_op_registry::GetMklOpName(csinfo_.sub),
                       CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.mul,
-                      mkl_op_registry::GetMklOpName(csinfo_.mul),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.maximum,
-                      mkl_op_registry::GetMklOpName(csinfo_.maximum),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
-    rinfo_.push_back({csinfo_.squared_difference,
-                      mkl_op_registry::GetMklOpName(csinfo_.squared_difference),
-                      CopyAttrsDataType, AlwaysRewrite, nullptr});
 
     // Add info about which ops to add workspace edge to and the slots.
     wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3});
@@ -454,6 +453,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   /// Structure to store all constant strings
   /// NOTE: names are alphabetically sorted.
   typedef struct {
+    string add;
     string avg_pool;
     string avg_pool_grad;
     string bias_add;
@@ -471,20 +471,19 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     string matmul;
     string max_pool;
     string max_pool_grad;
+    string maximum;
     string mkl_conv2d;
     string mkl_conv2d_grad_input;
     string mkl_conv2d_grad_filter;
     string mkl_conv2d_with_bias;
     string mkl_conv2d_with_bias_backprop_bias;
+    string mul;
     string relu;
     string relu_grad;
     string reshape;
     string split;
-    string sub;
-    string add;
-    string mul;
-    string maximum;
     string squared_difference;
+    string sub;
   } ConstStringsInfo;
 
  private:
@@ -932,11 +931,11 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsConv2D(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsIdentity(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
-  static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb);
 
-- 
GitLab


From 796313e74b089458b55450293bf5f652399915b7 Mon Sep 17 00:00:00 2001
From: Vivek Rane 
Date: Mon, 28 Aug 2017 11:56:26 -0700
Subject: [PATCH 035/294] Code review fixes - optimization for breaking out of
 loop early and comment fix

---
 tensorflow/core/graph/mkl_layout_pass.cc | 1 +
 tensorflow/core/util/mkl_util.h          | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 762c5426a4..d35fb82d77 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -2001,6 +2001,7 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
       if (mkl_op_registry::IsMklOp(
               mkl_op_registry::GetMklOpName(parent->src()->type_string()), T)) {
         incoming_mkl_edge = true;
+        break;
       } else {
         VLOG(1) << "Non-MKL parent is: " << parent->src()->type_string();
       }
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 92cb589d74..f4bec9524a 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -759,7 +759,7 @@ inline string GetMklOpName(const string& name) {
 //
 // @input: name of the op
 // @input: T datatype to be used for checking op
-// @return: true if opname is registered as Mkl op
+// @return: true if opname is registered as Mkl op; false otherwise
 static inline bool IsMklOp(const std::string& op_name, DataType T) {
   string kernel = KernelsRegisteredForOp(op_name);
   bool result =
@@ -775,7 +775,7 @@ static inline bool IsMklOp(const std::string& op_name, DataType T) {
 //
 // @input: name of the op
 // @input: T datatype to be used for checking op
-// @return: true if opname is registered as element-wise Mkl op
+// @return: true if opname is registered as element-wise Mkl op; false otherwise
 static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) {
   if (!IsMklOp(op_name, T)) {
     return false;
-- 
GitLab


From 5dcd2d1d338ec265dfb57a00f99e8b6c5a95037b Mon Sep 17 00:00:00 2001
From: Vivek Rane 
Date: Mon, 28 Aug 2017 14:09:42 -0700
Subject: [PATCH 036/294] Added more details about _MklInputConversionOp to the
 comments in the tfconversion pass

---
 tensorflow/core/graph/mkl_tfconversion_pass.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc
index 1c7b101fd5..3f8b0e86d0 100644
--- a/tensorflow/core/graph/mkl_tfconversion_pass.cc
+++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc
@@ -68,6 +68,11 @@ namespace tensorflow {
 // take place before we hit the op. For this, we add a new op before each
 // element-wise MKL op to deal with the inputs, called _MklInputConversion.
 // This pass has been enhanced to add this capability.
+// 
+// The _MklInputConversion op will check the inputs to the elementwise op and
+// make sure that either both are in MKL format or both are in TF format,
+// depending on their initial state and whether broadcast is needed or not.
+
 class MklToTfConversionPass : public GraphOptimizationPass {
  public:
   MklToTfConversionPass() {}
-- 
GitLab


From 383e833b5746bd2df2479316e5770e6c0c9131ee Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Mon, 28 Aug 2017 14:28:06 -0700
Subject: [PATCH 037/294] Updating the cmake version of cub to reflect bazel
 version.

PiperOrigin-RevId: 166757645
---
 tensorflow/contrib/cmake/external/cub.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/cmake/external/cub.cmake b/tensorflow/contrib/cmake/external/cub.cmake
index a5ce0059df..477572d588 100644
--- a/tensorflow/contrib/cmake/external/cub.cmake
+++ b/tensorflow/contrib/cmake/external/cub.cmake
@@ -14,8 +14,8 @@
 # ==============================================================================
 include (ExternalProject)
 
-set(cub_URL http://mirror.bazel.build/github.com/NVlabs/cub/archive/1.6.4.zip)
-set(cub_HASH SHA256=966d0c4f41e2bdc81aebf9ccfbf0baffaac5a74f00b826b06f4dee79b2bb8cee)
+set(cub_URL http://mirror.bazel.build/github.com/NVlabs/cub/archive/69ceda618313df8e9cac6659d607b08949455d14.tar.gz)
+set(cub_HASH SHA256=87e856522c283b8ea887c3b61d7d5b252d2dd74abac4f1d756d776e721223e82)
 set(cub_BUILD ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub)
 set(cub_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub)
 set(cub_ARCHIVE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/cub_archive)
-- 
GitLab


From 70a2de19160b920df48eb40df741817dacb76f7b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Mon, 28 Aug 2017 14:43:04 -0700
Subject: [PATCH 038/294] [XLA] Add reduce-precision-insertion pass options to
 address operation inputs.

We add options to reduce the precision of individual operation inputs before optimization, and to reduce the precision of inputs to fusion nodes.  This change also renames and significantly refactors the "pass timing" option enum, since it is no longer just about the pass timing.

PiperOrigin-RevId: 166760001
---
 .../compiler/xla/service/cpu/cpu_compiler.cc  |   7 +-
 .../compiler/xla/service/gpu/gpu_compiler.cc  |   7 +-
 .../xla/service/reduce_precision_insertion.cc | 263 ++++++++++++------
 .../xla/service/reduce_precision_insertion.h  |  70 ++++-
 .../reduce_precision_insertion_test.cc        | 227 +++++++++++++--
 .../xla/tests/reduce_precision_test.cc        |  10 +-
 tensorflow/compiler/xla/xla.proto             |  33 ++-
 7 files changed, 463 insertions(+), 154 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 1542237279..1e331c1cb8 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -257,7 +257,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module) {
 
   ReducePrecisionInsertion::AddPasses(
       &pipeline, module->config().debug_options(),
-      HloReducePrecisionOptions::BEFORE_OP_FUSION);
+      ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION);
 
   // TODO(b/35786417): Re-enable inliner pass after fixing the bug and deciding
   // where we will take this pass in future.
@@ -292,10 +292,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module) {
 
   ReducePrecisionInsertion::AddPasses(
       &pipeline, module->config().debug_options(),
-      HloReducePrecisionOptions::AFTER_OP_FUSION);
-  ReducePrecisionInsertion::AddPasses(
-      &pipeline, module->config().debug_options(),
-      HloReducePrecisionOptions::FUSION_BY_CONTENT);
+      ReducePrecisionInsertion::PassTiming::AFTER_FUSION);
 
   pipeline.AddPass(
       module->mutable_entry_computation_layout());
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 70f2b573be..14054b3553 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -127,7 +127,7 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module,
     pipeline.AddInvariantChecker();
     ReducePrecisionInsertion::AddPasses(
         &pipeline, hlo_module->config().debug_options(),
-        HloReducePrecisionOptions::BEFORE_OP_FUSION);
+        ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION);
     {
       auto& pass =
           pipeline.AddPass>("simplification");
@@ -170,10 +170,7 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module,
     reduce_pipeline.AddInvariantChecker();
     ReducePrecisionInsertion::AddPasses(
         &reduce_pipeline, hlo_module->config().debug_options(),
-        HloReducePrecisionOptions::AFTER_OP_FUSION);
-    ReducePrecisionInsertion::AddPasses(
-        &reduce_pipeline, hlo_module->config().debug_options(),
-        HloReducePrecisionOptions::FUSION_BY_CONTENT);
+        ReducePrecisionInsertion::PassTiming::AFTER_FUSION);
     StatusOr reduce_result = reduce_pipeline.Run(hlo_module);
     TF_RETURN_IF_ERROR(reduce_result.status());
 
diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc
index 353485c5a5..01dbb7e866 100644
--- a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc
+++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc
@@ -21,52 +21,35 @@ limitations under the License.
 
 namespace xla {
 
-// For now, ReducePrecision is only implemented for F32 arrays, so this
-// ignores instructions that produce other data.  In particular, this
-// currently ignores instructions producing tuples, even if those tuples
-// contain F32 arrays inside them.  The assumption is that in most cases
-// equivalent behavior can be obtained by adding ReducePrecision
-// instructions after the instructions that pull the F32 arrays out of
-// the tuples.
-//
-// TODO(b/64093391): Remove the IsScalar check once this won't cause
-// failures on the GPU backend if the ReducePrecision instruction ends up
-// inserted between a scalar constant and the init_value argument of a
-// Reduce operation.
-std::vector ReducePrecisionInsertion::instructions_to_suffix(
+std::vector ReducePrecisionInsertion::instructions_to_modify(
     const HloComputation* computation) {
-  std::vector instructions_to_suffix;
+  std::vector instruction_list;
 
-  switch (pass_timing_) {
-    case HloReducePrecisionOptions::BEFORE_OP_FUSION:
-    case HloReducePrecisionOptions::AFTER_OP_FUSION:
+  switch (location_) {
+    case HloReducePrecisionOptions::OP_INPUTS:
+    case HloReducePrecisionOptions::OP_OUTPUTS:
+    case HloReducePrecisionOptions::UNFUSED_OP_OUTPUTS:
       for (auto& instruction : computation->instructions()) {
         VLOG(4) << "Visited instruction: " << instruction->ToString();
-
-        if (instruction->shape().element_type() == PrimitiveType::F32 &&
-            !ShapeUtil::IsScalar(instruction->shape()) &&
-            instruction_filter_function_(instruction.get())) {
-          instructions_to_suffix.push_back(instruction.get());
+        if (instruction_filter_function_(instruction.get())) {
+          instruction_list.push_back(instruction.get());
         }
       }
       break;
 
-    case HloReducePrecisionOptions::FUSION_BY_CONTENT:
+    case HloReducePrecisionOptions::FUSION_INPUTS_BY_CONTENT:
+    case HloReducePrecisionOptions::FUSION_OUTPUTS_BY_CONTENT:
       for (auto& instruction : computation->instructions()) {
         VLOG(4) << "Visited instruction: " << instruction->ToString();
-
-        if (instruction->opcode() != HloOpcode::kFusion ||
-            instruction->shape().element_type() != PrimitiveType::F32 ||
-            ShapeUtil::IsScalar(instruction->shape())) {
+        if (instruction->opcode() != HloOpcode::kFusion) {
           continue;
         }
-
         for (auto& fused_instruction :
              instruction->fused_instructions_computation()->instructions()) {
           VLOG(4) << "Checking sub-instruction: "
                   << fused_instruction->ToString();
           if (instruction_filter_function_(fused_instruction.get())) {
-            instructions_to_suffix.push_back(instruction.get());
+            instruction_list.push_back(instruction.get());
             break;
           }
         }
@@ -76,70 +59,166 @@ std::vector ReducePrecisionInsertion::instructions_to_suffix(
     default:
       break;
   }
-  VLOG(1) << "Adding " << instructions_to_suffix.size()
-          << " reduce-precision operations.";
+  VLOG(1) << "Found " << instruction_list.size()
+          << " candidate instruction(s) for reduce-precision insertion";
 
-  return instructions_to_suffix;
+  return instruction_list;
 }
 
-StatusOr ReducePrecisionInsertion::Run(HloModule* module) {
-  bool changed = false;
-  VLOG(1) << "Running ReducePrecisionInsertion pass on " << module->name();
+StatusOr ReducePrecisionInsertion::insert_after(
+    HloInstruction* instruction) {
+  // Check that this isn't already an equivalent operation.
+  if (is_redundant(instruction)) {
+    VLOG(2) << "Skipped: instruction is already an equivalent"
+               " reduce-precision instruction:"
+            << instruction->ToString();
+    return false;
+  }
 
-  for (auto& computation : module->computations()) {
-    if (computation->IsFusionComputation()) {
-      continue;
+  // Check that we haven't already inserted an equivalant reduce-precision
+  // operation after this instruction.  (The zero-user case occurs when this is
+  // the root instruction.)
+  if (instruction->user_count() > 0) {
+    bool redundant_followers = true;
+    for (HloInstruction* user : instruction->users()) {
+      if (!is_redundant(user)) {
+        redundant_followers = false;
+        break;
+      }
+    }
+    if (redundant_followers) {
+      VLOG(2) << "Skipped: instruction already followed by equivalent"
+                 " reduce-precision instructions";
+      return false;
     }
+  }
 
-    bool computation_changed = false;
-    for (auto& instruction : instructions_to_suffix(computation.get())) {
-      VLOG(2) << "Adding reduce-precision operation to output of instruction: "
-              << instruction->ToString();
-
-      // Check that we haven't already inserted an equivalant reduce-precision
-      // operation after this instruction.
-      if (instruction->user_count() == 1) {
-        HloInstruction* user = instruction->users()[0];
-
-        if (user->opcode() == HloOpcode::kReducePrecision &&
-            user->exponent_bits() == exponent_bits_ &&
-            user->mantissa_bits() == mantissa_bits_) {
-          VLOG(2) << "Skipped; instruction already followed by equivalent"
-                     " reduce-precision instruction:"
-                  << user->ToString();
-          continue;
-        }
+  HloInstruction* reduced = instruction->parent()->AddInstruction(
+      HloInstruction::CreateReducePrecision(instruction->shape(), instruction,
+                                            exponent_bits_, mantissa_bits_));
+  TF_RETURN_IF_ERROR(
+      instruction->parent()->ReplaceUsesOfInstruction(instruction, reduced));
+  return true;
+}
+
+StatusOr ReducePrecisionInsertion::insert_on_inputs(
+    const std::vector& instructions) {
+  bool computation_changed = false;
+  for (auto instruction : instructions) {
+    VLOG(2) << "Adding reduce-precision operation to inputs of instruction: "
+            << instruction->ToString();
+    for (int64 i = 0; i < instruction->operand_count(); i++) {
+      HloInstruction* operand = instruction->mutable_operand(i);
+      VLOG(2) << "Adding to operand " << i << ": " << operand;
+
+      if (!is_valid_shape(operand->shape())) {
+        VLOG(2) << "Skipped: value is not an F32 vector";
+        continue;
+      }
+
+      if (is_redundant(operand)) {
+        VLOG(2) << "Skipped: operand is already an equivalent reduce-precision"
+                   " instruction";
+        continue;
       }
 
       if (instruction->opcode() == HloOpcode::kFusion) {
-        // Insert the reduce-precision operation as the last operation inside
-        // the fusion computation.
-        instruction = instruction->fused_expression_root();
-
-        VLOG(2) << "Inserting new operation after existing fusion root: "
-                << instruction->ToString();
-
-        if (instruction->opcode() == HloOpcode::kReducePrecision &&
-            instruction->exponent_bits() == exponent_bits_ &&
-            instruction->mantissa_bits() == mantissa_bits_) {
-          VLOG(2) << "Skipped; fused computation already ends in equivalent"
-                     " reduce-precision instruction:"
-                  << instruction->ToString();
-          continue;
+        // Insert the reduce-precision operation inside the fusion computation,
+        // after the corresponding parameter instruction.
+        TF_ASSIGN_OR_RETURN(
+            bool instruction_changed,
+            insert_after(instruction->fused_instructions_computation()
+                             ->parameter_instruction(i)));
+        computation_changed |= instruction_changed;
+      } else {
+        // Look for an existing reduce-precision operation on the operand.  (We
+        // need to be careful not to create a loop, though!)
+        HloInstruction* reduced = nullptr;
+        for (auto& user : operand->users()) {
+          if (user != instruction &&
+              user->opcode() == HloOpcode::kReducePrecision &&
+              user->exponent_bits() == exponent_bits_ &&
+              user->mantissa_bits() == mantissa_bits_) {
+            reduced = user;
+            break;
+          }
+        }
+        // If there wasn't an existing reduce-precision operation, create one.
+        if (!reduced) {
+          reduced = instruction->parent()->AddInstruction(
+              HloInstruction::CreateReducePrecision(
+                  operand->shape(), operand, exponent_bits_, mantissa_bits_));
         }
+        // Insert the reduce-precision operation before the operand.
+        TF_RETURN_IF_ERROR(instruction->ReplaceOperandWith(i, reduced));
+        computation_changed = true;
       }
+    }
+  }
+
+  return computation_changed;
+}
 
-      HloInstruction* reduced = instruction->parent()->AddInstruction(
-          HloInstruction::CreateReducePrecision(instruction->shape(),
-                                                instruction, exponent_bits_,
-                                                mantissa_bits_));
+StatusOr ReducePrecisionInsertion::insert_on_outputs(
+    const std::vector& instructions) {
+  bool computation_changed = false;
+  for (const auto& instruction : instructions) {
+    VLOG(2) << "Adding reduce-precision operation to output of instruction: "
+            << instruction->ToString();
 
-      TF_RETURN_IF_ERROR(instruction->parent()->ReplaceUsesOfInstruction(
-          instruction, reduced));
-      computation_changed = true;
+    if (!is_valid_shape(instruction->shape())) {
+      VLOG(2) << "Skipped: value is not an F32 nonscalar array";
+      continue;
     }
 
-    if (computation_changed) {
+    if (instruction->opcode() == HloOpcode::kFusion) {
+      // Insert the reduce-precision operation as the last operation inside
+      // the fusion computation.
+      HloInstruction* fusion_root = instruction->fused_expression_root();
+      VLOG(2) << "Inserting new operation after existing fusion root: "
+              << fusion_root->ToString();
+
+      TF_ASSIGN_OR_RETURN(bool instruction_changed, insert_after(fusion_root));
+      computation_changed |= instruction_changed;
+    } else {
+      // Insert the reduce-precision operation after the instruction.
+      TF_ASSIGN_OR_RETURN(bool instruction_changed, insert_after(instruction));
+      computation_changed |= instruction_changed;
+    }
+  }
+
+  return computation_changed;
+}
+
+StatusOr ReducePrecisionInsertion::Run(HloModule* module) {
+  bool changed = false;
+  VLOG(1) << "Running ReducePrecisionInsertion pass on " << module->name();
+
+  for (auto& computation : module->computations()) {
+    if (computation->IsFusionComputation()) {
+      continue;
+    }
+
+    StatusOr computation_changed;
+    switch (location_) {
+      case HloReducePrecisionOptions::OP_INPUTS:
+      case HloReducePrecisionOptions::FUSION_INPUTS_BY_CONTENT:
+        computation_changed = ReducePrecisionInsertion::insert_on_inputs(
+            instructions_to_modify(computation.get()));
+        break;
+
+      case HloReducePrecisionOptions::FUSION_OUTPUTS_BY_CONTENT:
+      case HloReducePrecisionOptions::OP_OUTPUTS:
+      case HloReducePrecisionOptions::UNFUSED_OP_OUTPUTS:
+        computation_changed = ReducePrecisionInsertion::insert_on_outputs(
+            instructions_to_modify(computation.get()));
+        break;
+      default:
+        break;
+    }
+    TF_RETURN_IF_ERROR(computation_changed.status());
+
+    if (computation_changed.ValueOrDie()) {
       changed = true;
       VLOG(3) << "Computation after reduce-precision insertion:";
       XLA_VLOG_LINES(3, computation->ToString());
@@ -186,12 +265,12 @@ ReducePrecisionInsertion::make_filter_function(
 }
 
 HloReducePrecisionOptions ReducePrecisionInsertion::make_options_proto(
-    const HloReducePrecisionOptions::PassTiming pass_timing,
-    const int exponent_bits, const int mantissa_bits,
+    const HloReducePrecisionOptions::Location location, const int exponent_bits,
+    const int mantissa_bits,
     const std::function& opcode_filter_function,
     const std::vector& opname_substring_list) {
   HloReducePrecisionOptions options;
-  options.set_pass_timing(pass_timing);
+  options.set_location(location);
   options.set_exponent_bits(exponent_bits);
   options.set_mantissa_bits(mantissa_bits);
   for (uint32_t opcode = 0; opcode < HloOpcodeCount(); opcode++) {
@@ -205,13 +284,27 @@ HloReducePrecisionOptions ReducePrecisionInsertion::make_options_proto(
   return options;
 }
 
-bool ReducePrecisionInsertion::AddPasses(
-    HloPassPipeline* pipeline, const DebugOptions& debug_options,
-    const HloReducePrecisionOptions::PassTiming pass_timing) {
+bool ReducePrecisionInsertion::AddPasses(HloPassPipeline* pipeline,
+                                         const DebugOptions& debug_options,
+                                         const PassTiming pass_timing) {
   bool passes_added = false;
   for (const auto& pass_options :
        debug_options.hlo_reduce_precision_options()) {
-    if (pass_options.pass_timing() == pass_timing) {
+    bool add_pass;
+    switch (pass_options.location()) {
+      case HloReducePrecisionOptions::OP_INPUTS:
+      case HloReducePrecisionOptions::OP_OUTPUTS:
+        add_pass = pass_timing == PassTiming::BEFORE_OPTIMIZATION;
+        break;
+      case HloReducePrecisionOptions::UNFUSED_OP_OUTPUTS:
+      case HloReducePrecisionOptions::FUSION_INPUTS_BY_CONTENT:
+      case HloReducePrecisionOptions::FUSION_OUTPUTS_BY_CONTENT:
+        add_pass = pass_timing == PassTiming::AFTER_FUSION;
+        break;
+      default:
+        add_pass = false;
+    }
+    if (add_pass) {
       pipeline->AddPass(pass_options);
       passes_added = true;
     }
diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.h b/tensorflow/compiler/xla/service/reduce_precision_insertion.h
index 6c5b8cdc0f..afde3cf95c 100644
--- a/tensorflow/compiler/xla/service/reduce_precision_insertion.h
+++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.h
@@ -39,11 +39,11 @@ class ReducePrecisionInsertion : public HloPassInterface {
   // function returns true and the output type is F32.
   explicit ReducePrecisionInsertion(
       const int exponent_bits, const int mantissa_bits,
-      const HloReducePrecisionOptions::PassTiming pass_timing,
+      const HloReducePrecisionOptions::Location location,
       const InstructionFilterFunction& instruction_filter_function)
       : exponent_bits_(exponent_bits),
         mantissa_bits_(mantissa_bits),
-        pass_timing_(pass_timing),
+        location_(location),
         instruction_filter_function_(instruction_filter_function) {}
 
   // Version of the constructor that takes an HloReducePrecisionOptions proto
@@ -53,7 +53,7 @@ class ReducePrecisionInsertion : public HloPassInterface {
       const HloReducePrecisionOptions& reduce_precision_options)
       : exponent_bits_(reduce_precision_options.exponent_bits()),
         mantissa_bits_(reduce_precision_options.mantissa_bits()),
-        pass_timing_(reduce_precision_options.pass_timing()),
+        location_(reduce_precision_options.location()),
         instruction_filter_function_(
             make_filter_function(reduce_precision_options)) {}
 
@@ -72,33 +72,79 @@ class ReducePrecisionInsertion : public HloPassInterface {
   static InstructionFilterFunction make_filter_function(
       const HloReducePrecisionOptions& reduce_precision_options);
   static HloReducePrecisionOptions make_options_proto(
-      const HloReducePrecisionOptions::PassTiming pass_timing,
+      const HloReducePrecisionOptions::Location location,
       const int exponent_bits, const int mantissa_bits,
       const std::function& opcode_filter_function,
       const std::vector& opname_substring_list = {});
 
+  // Enumeration to control which passes should be added.
+  enum class PassTiming { BEFORE_OPTIMIZATION, AFTER_FUSION };
+
   // Add ReducePrecisionInsertion passes to an HloPassPipeline based on the list
   // of HloReducePrecisionOptions in a DebugOptions proto.  Returns true if any
   // passes were added.
-  static bool AddPasses(
-      HloPassPipeline* pipeline, const DebugOptions& debug_options,
-      const HloReducePrecisionOptions::PassTiming pass_timing);
+  static bool AddPasses(HloPassPipeline* pipeline,
+                        const DebugOptions& debug_options,
+                        const PassTiming pass_timing);
 
  private:
-  // Select the instructions that should be suffixed with reduce-precision
-  // operators.
-  std::vector instructions_to_suffix(
+  // Select the instructions that should have reduce-precision operations
+  // attached to them.
+  std::vector instructions_to_modify(
       const HloComputation* computation);
 
+  // Insert a reduce-precision operation into the graph on the output of the
+  // given instruction.
+  StatusOr insert_after(HloInstruction* instruction);
+
+  // Insert reduce-precision operations into the graph on the inputs of the
+  // given instructions.  (For fusion instructions, the operations will be
+  // inserted inside the fusion computation, on the outputs of the relevant
+  // input parameters.)
+  StatusOr insert_on_inputs(
+      const std::vector& instructions);
+
+  // Insert reduce-precision operations into the graph on the outputs of the
+  // given instructions.  (For fusion instructions, the operations will be
+  // inserted inside the fusion computation as a new root.)
+  StatusOr insert_on_outputs(
+      const std::vector& instructions);
+
+  // Is this shape valid for inserting a reduce-precision operation?
+  bool is_valid_shape(const Shape& shape) {
+    // For now, ReducePrecision is only implemented for F32 arrays, so this
+    // ignores instructions that produce other data.  In particular, this
+    // currently ignores instructions producing tuples, even if those tuples
+    // contain F32 arrays inside them.  The assumption is that in most cases
+    // equivalent behavior can be obtained by adding ReducePrecision
+    // instructions after the instructions that pull the F32 arrays out of
+    // the tuples.
+    //
+    // TODO(b/64093391): Remove the IsScalar check once this won't cause
+    // failures on the GPU backend if the ReducePrecision instruction ends up
+    // inserted between a scalar constant and the init_value argument of a
+    // Reduce operation.
+    return shape.element_type() == PrimitiveType::F32 &&
+           !ShapeUtil::IsScalar(shape);
+  }
+
+  // Is this instruction one such that following or preceding it with a new
+  // reduce-precision operation will be redundant?
+  bool is_redundant(const HloInstruction* instruction) {
+    return instruction->opcode() == HloOpcode::kReducePrecision &&
+           instruction->exponent_bits() <= exponent_bits_ &&
+           instruction->mantissa_bits() <= mantissa_bits_;
+  }
+
   // Parameters for the precision reduction to be added.
   const int exponent_bits_;
   const int mantissa_bits_;
 
   // Pass "timing" parameter.  This also controls aspects of how the pass
   // selects locations to insert instructions.
-  const HloReducePrecisionOptions::PassTiming pass_timing_;
+  const HloReducePrecisionOptions::Location location_;
 
-  // Function to determine (from the opcode) whether a given instruction should
+  // User-provided Function to determine whether a given instruction should
   // have a reduce-precision instruction inserted in its output stream.
   const InstructionFilterFunction instruction_filter_function_;
 };
diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc b/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc
index d4640bec0e..064020896e 100644
--- a/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc
@@ -35,16 +35,16 @@ using ::testing::UnorderedElementsAre;
 class ReducePrecisionInsertionTest : public HloTestBase {
  protected:
   bool InsertOps(HloModule* module,
-                 const HloReducePrecisionOptions::PassTiming pass_timing,
+                 const HloReducePrecisionOptions::Location location,
                  const std::function& filter) {
-    ReducePrecisionInsertion op_insertion(5, 10, pass_timing, filter);
+    ReducePrecisionInsertion op_insertion(5, 10, location, filter);
     StatusOr result = op_insertion.Run(module);
     EXPECT_IS_OK(result.status());
     return result.ValueOrDie();
   }
 };
 
-TEST_F(ReducePrecisionInsertionTest, RootInstruction) {
+TEST_F(ReducePrecisionInsertionTest, BeforeUnaryInstruction) {
   auto builder = HloComputation::Builder(TestName());
   Shape shape = ShapeUtil::MakeShape(F32, {4});
 
@@ -59,19 +59,141 @@ TEST_F(ReducePrecisionInsertionTest, RootInstruction) {
 
   // Confirm expected state before adding ops.
   EXPECT_EQ(computation->root_instruction(), b);
+  EXPECT_EQ(b->operand(0), a);
 
-  EXPECT_TRUE(InsertOps(module.get(),
-                        HloReducePrecisionOptions::BEFORE_OP_FUSION,
+  EXPECT_TRUE(InsertOps(module.get(), HloReducePrecisionOptions::OP_INPUTS,
                         [](const HloInstruction* instruction) {
                           return instruction->opcode() == HloOpcode::kCos;
                         }));
 
   // Confirm expected graph after adding ops.
-  EXPECT_THAT(computation->root_instruction(), op::ReducePrecision());
-  EXPECT_EQ(computation->root_instruction()->operand(0), b);
+  EXPECT_EQ(computation->root_instruction(), b);
+  EXPECT_THAT(b->operand(0), op::ReducePrecision(a));
 }
 
-TEST_F(ReducePrecisionInsertionTest, NonRootInstruction) {
+TEST_F(ReducePrecisionInsertionTest, BeforeBinaryInstruction) {
+  auto builder = HloComputation::Builder(TestName());
+  Shape shape = ShapeUtil::MakeShape(F32, {4});
+
+  // Create a simple graph with parameter feeding a binary add function.
+
+  HloInstruction* a =
+      builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "a"));
+  HloInstruction* b =
+      builder.AddInstruction(HloInstruction::CreateParameter(1, shape, "b"));
+  HloInstruction* c = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, a, b));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+
+  // Confirm expected state before adding ops.
+  EXPECT_EQ(computation->root_instruction(), c);
+  EXPECT_EQ(c->operand(0), a);
+  EXPECT_EQ(c->operand(1), b);
+
+  EXPECT_TRUE(InsertOps(module.get(), HloReducePrecisionOptions::OP_INPUTS,
+                        [](const HloInstruction* instruction) {
+                          return instruction->opcode() == HloOpcode::kAdd;
+                        }));
+
+  // Confirm expected graph after adding ops.
+  EXPECT_EQ(computation->root_instruction(), c);
+  EXPECT_THAT(c->operand(0), op::ReducePrecision(a));
+  EXPECT_THAT(c->operand(1), op::ReducePrecision(b));
+}
+
+TEST_F(ReducePrecisionInsertionTest, BeforeZeroInputInstruction) {
+  auto builder = HloComputation::Builder(TestName());
+  Shape shape = ShapeUtil::MakeShape(F32, {4});
+
+  // Create a simple graph with a parameter feeding a unary cosine function.
+  HloInstruction* a =
+      builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "a"));
+  HloInstruction* b = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kCos, a));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+
+  // Confirm expected state before adding ops.
+  EXPECT_EQ(computation->root_instruction(), b);
+  EXPECT_EQ(b->operand(0), a);
+
+  EXPECT_FALSE(InsertOps(module.get(), HloReducePrecisionOptions::OP_INPUTS,
+                         [](const HloInstruction* instruction) {
+                           return instruction->opcode() ==
+                                  HloOpcode::kParameter;
+                         }));
+
+  // Confirm that graph has not changed.
+  EXPECT_EQ(computation->root_instruction(), b);
+  EXPECT_EQ(b->operand(0), a);
+}
+
+TEST_F(ReducePrecisionInsertionTest, AvoidAddingDuplicateInstructions) {
+  auto builder = HloComputation::Builder(TestName());
+  Shape shape = ShapeUtil::MakeShape(F32, {4});
+
+  // Create a simple graph with parameter feeding a binary add function.
+
+  HloInstruction* a =
+      builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "a"));
+  HloInstruction* b = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kCos, a));
+  HloInstruction* c = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kSin, a));
+  HloInstruction* d = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, b, c));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+
+  // Confirm expected state before adding ops.
+  EXPECT_EQ(computation->root_instruction(), d);
+  EXPECT_EQ(b->operand(0), a);
+  EXPECT_EQ(c->operand(0), a);
+
+  EXPECT_TRUE(InsertOps(module.get(), HloReducePrecisionOptions::OP_INPUTS,
+                        [](const HloInstruction* instruction) {
+                          return instruction->opcode() == HloOpcode::kCos ||
+                                 instruction->opcode() == HloOpcode::kSin;
+                        }));
+
+  // Confirm expected graph after adding ops.  In particular, we want to confirm
+  // that the reduced-precision operation added for the input to b is re-used
+  // for the input to c.
+  EXPECT_THAT(b->operand(0), op::ReducePrecision(a));
+  EXPECT_THAT(c->operand(0), op::ReducePrecision(a));
+  EXPECT_EQ(b->operand(0), c->operand(0));
+}
+
+TEST_F(ReducePrecisionInsertionTest, AfterRootInstruction) {
+  auto builder = HloComputation::Builder(TestName());
+  Shape shape = ShapeUtil::MakeShape(F32, {4});
+
+  // Create a simple graph with a parameter feeding a unary cosine function.
+  HloInstruction* a =
+      builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "a"));
+  HloInstruction* b = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kCos, a));
+
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+
+  // Confirm expected state before adding ops.
+  EXPECT_EQ(computation->root_instruction(), b);
+
+  EXPECT_TRUE(InsertOps(module.get(), HloReducePrecisionOptions::OP_OUTPUTS,
+                        [](const HloInstruction* instruction) {
+                          return instruction->opcode() == HloOpcode::kCos;
+                        }));
+
+  // Confirm expected graph after adding ops.
+  EXPECT_THAT(computation->root_instruction(), op::ReducePrecision(b));
+}
+
+TEST_F(ReducePrecisionInsertionTest, AfterNonRootInstruction) {
   auto builder = HloComputation::Builder(TestName());
   Shape shape = ShapeUtil::MakeShape(F32, {4});
 
@@ -100,8 +222,7 @@ TEST_F(ReducePrecisionInsertionTest, NonRootInstruction) {
   EXPECT_EQ(c->operand(0), a_cos);
   EXPECT_EQ(c->operand(1), b_cos);
 
-  EXPECT_TRUE(InsertOps(module.get(),
-                        HloReducePrecisionOptions::BEFORE_OP_FUSION,
+  EXPECT_TRUE(InsertOps(module.get(), HloReducePrecisionOptions::OP_OUTPUTS,
                         [](const HloInstruction* instruction) {
                           return instruction->opcode() == HloOpcode::kCos;
                         }));
@@ -131,7 +252,7 @@ TEST_F(ReducePrecisionInsertionTest, OutputIsNotFloat) {
   // Since none of the instructions produce F32 data, this should not change
   // the graph.
   EXPECT_FALSE(
-      InsertOps(module.get(), HloReducePrecisionOptions::BEFORE_OP_FUSION,
+      InsertOps(module.get(), HloReducePrecisionOptions::OP_OUTPUTS,
                 [](const HloInstruction* instruction) { return true; }));
 
   // Confirm that graph has not changed.
@@ -157,7 +278,7 @@ TEST_F(ReducePrecisionInsertionTest, ShouldReduceOutputPrecisionIsFalse) {
   // Since none of the instructions match the should_reduce_output_precision
   // function, this should not change the graph.
   EXPECT_FALSE(
-      InsertOps(module.get(), HloReducePrecisionOptions::BEFORE_OP_FUSION,
+      InsertOps(module.get(), HloReducePrecisionOptions::OP_OUTPUTS,
                 [](const HloInstruction* instruction) { return false; }));
 
   // Confirm that graph has not changed.
@@ -181,18 +302,18 @@ TEST_F(ReducePrecisionInsertionTest, InsertionIsNotRecursive) {
 
   // This should insert a new ReducePrecision after the existing one, but
   // should not then recurse by adding another after the just-inserted one.
-  EXPECT_TRUE(
-      InsertOps(module.get(), HloReducePrecisionOptions::BEFORE_OP_FUSION,
-                [](const HloInstruction* instruction) {
-                  return instruction->opcode() == HloOpcode::kReducePrecision;
-                }));
+  EXPECT_TRUE(InsertOps(module.get(), HloReducePrecisionOptions::OP_OUTPUTS,
+                        [](const HloInstruction* instruction) {
+                          return instruction->opcode() ==
+                                 HloOpcode::kReducePrecision;
+                        }));
 
   // Confirm expected graph after adding ops.
   EXPECT_THAT(computation->root_instruction(), op::ReducePrecision());
   EXPECT_EQ(computation->root_instruction()->operand(0), b);
 }
 
-TEST_F(ReducePrecisionInsertionTest, SkipRedundantReducePrecision) {
+TEST_F(ReducePrecisionInsertionTest, SkipRedundantReducePrecisionAfter) {
   auto builder = HloComputation::Builder(TestName());
   Shape shape = ShapeUtil::MakeShape(F32, {4});
   HloInstruction* x =
@@ -209,11 +330,11 @@ TEST_F(ReducePrecisionInsertionTest, SkipRedundantReducePrecision) {
 
   // Since the new reduce-precision operation would be redundant, this
   // should not change the graph.
-  EXPECT_FALSE(
-      InsertOps(module.get(), HloReducePrecisionOptions::BEFORE_OP_FUSION,
-                [](const HloInstruction* instruction) {
-                  return instruction->opcode() == HloOpcode::kParameter;
-                }));
+  EXPECT_FALSE(InsertOps(module.get(), HloReducePrecisionOptions::OP_OUTPUTS,
+                         [](const HloInstruction* instruction) {
+                           return instruction->opcode() ==
+                                  HloOpcode::kParameter;
+                         }));
 
   // Confirm that graph has not changed.
   EXPECT_THAT(x->users(), UnorderedElementsAre(y));
@@ -237,8 +358,7 @@ TEST_F(ReducePrecisionInsertionTest, AddNonRedundantReducePrecision) {
 
   // Since the new reduce-precision operation is not the same as the existing
   // one, this should add a new one.
-  EXPECT_TRUE(InsertOps(module.get(),
-                        HloReducePrecisionOptions::BEFORE_OP_FUSION,
+  EXPECT_TRUE(InsertOps(module.get(), HloReducePrecisionOptions::OP_OUTPUTS,
                         [](const HloInstruction* instruction) {
                           return instruction->opcode() == HloOpcode::kParameter;
                         }));
@@ -273,7 +393,7 @@ TEST_F(ReducePrecisionInsertionTest, IgnoreOpsInsideFusionNode) {
   // The ReducePrecisionInsertion pass should not see inside the fusion
   // operation, so this should not change the graph.
   EXPECT_FALSE(InsertOps(module.get(),
-                         HloReducePrecisionOptions::AFTER_OP_FUSION,
+                         HloReducePrecisionOptions::UNFUSED_OP_OUTPUTS,
                          [](const HloInstruction* instruction) {
                            return instruction->opcode() == HloOpcode::kCos;
                          }));
@@ -284,6 +404,53 @@ TEST_F(ReducePrecisionInsertionTest, IgnoreOpsInsideFusionNode) {
   EXPECT_EQ(z->fused_expression_root(), y_fused);
 }
 
+TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInHeadOfFusionNode) {
+  auto builder = HloComputation::Builder(TestName());
+  Shape shape = ShapeUtil::MakeShape(F32, {4});
+  HloInstruction* x =
+      builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "x"));
+  HloInstruction* y = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kCos, x));
+  auto module = CreateNewModule();
+  auto computation = module->AddEntryComputation(builder.Build());
+
+  // Manually fuse the kCos operation into a fusion operation.
+  HloInstruction* z = computation->AddInstruction(HloInstruction::CreateFusion(
+      shape, HloInstruction::FusionKind::kLoop, y));
+  EXPECT_IS_OK(computation->ReplaceUsesOfInstruction(y, z));
+  EXPECT_IS_OK(computation->RemoveInstruction(y));
+
+  // Confirm expected graph before adding reduce-precision ops.
+  EXPECT_THAT(x->users(), UnorderedElementsAre(z));
+  EXPECT_EQ(computation->root_instruction(), z);
+  HloInstruction* y_fused = z->fused_expression_root();
+  EXPECT_EQ(y_fused->opcode(), HloOpcode::kCos);
+
+  // This should see that the fusion computation contains a kCos operation,
+  // and insert a new reduce-precision node at its input.
+  EXPECT_TRUE(InsertOps(module.get(),
+                        HloReducePrecisionOptions::FUSION_INPUTS_BY_CONTENT,
+                        [](const HloInstruction* instruction) {
+                          return instruction->opcode() == HloOpcode::kCos;
+                        }));
+
+  // This should refuse to insert a second reduce-precision operation, as
+  // it would be redundant with the first.
+  EXPECT_FALSE(InsertOps(module.get(),
+                         HloReducePrecisionOptions::FUSION_INPUTS_BY_CONTENT,
+                         [](const HloInstruction* instruction) {
+                           return instruction->opcode() == HloOpcode::kCos;
+                         }));
+
+  // Confirm that the top-level computation still only contains the fusion
+  // instruction, but that the fused computation now has a reduce-precision
+  // instruction inserted after its parameter instruction.
+  EXPECT_THAT(x->users(), UnorderedElementsAre(z));
+  EXPECT_EQ(computation->root_instruction(), z);
+  EXPECT_THAT(z->fused_expression_root(), y_fused);
+  EXPECT_THAT(y_fused->operand(0), op::ReducePrecision(op::Parameter()));
+}
+
 TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInTailOfFusionNode) {
   auto builder = HloComputation::Builder(TestName());
   Shape shape = ShapeUtil::MakeShape(F32, {4});
@@ -309,7 +476,7 @@ TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInTailOfFusionNode) {
   // This should see that the fusion computation contains a kCos operation,
   // and insert a new reduce-precision node at its root.
   EXPECT_TRUE(InsertOps(module.get(),
-                        HloReducePrecisionOptions::FUSION_BY_CONTENT,
+                        HloReducePrecisionOptions::FUSION_OUTPUTS_BY_CONTENT,
                         [](const HloInstruction* instruction) {
                           return instruction->opcode() == HloOpcode::kCos;
                         }));
@@ -317,7 +484,7 @@ TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInTailOfFusionNode) {
   // This should refuse to insert a second reduce-precision operation, as
   // it would be redundant with the first.
   EXPECT_FALSE(InsertOps(module.get(),
-                         HloReducePrecisionOptions::FUSION_BY_CONTENT,
+                         HloReducePrecisionOptions::FUSION_OUTPUTS_BY_CONTENT,
                          [](const HloInstruction* instruction) {
                            return instruction->opcode() == HloOpcode::kCos;
                          }));
@@ -341,7 +508,7 @@ TEST_F(ReducePrecisionInsertionTest, MakeFilterFunctionNoSubstrings) {
       HloInstruction::CreateUnary(shape, HloOpcode::kSin, a));
 
   auto options_proto = ReducePrecisionInsertion::make_options_proto(
-      HloReducePrecisionOptions::BEFORE_OP_FUSION, 5, 10,
+      HloReducePrecisionOptions::OP_OUTPUTS, 5, 10,
       [](const HloOpcode opcode) { return opcode == HloOpcode::kCos; });
 
   auto filter_function =
@@ -370,7 +537,7 @@ TEST_F(ReducePrecisionInsertionTest, MakeFilterFunctionWithSubstrings) {
   c->set_metadata(c_metadata);
 
   auto options_proto = ReducePrecisionInsertion::make_options_proto(
-      HloReducePrecisionOptions::BEFORE_OP_FUSION, 5, 10,
+      HloReducePrecisionOptions::OP_OUTPUTS, 5, 10,
       [](const HloOpcode opcode) { return opcode == HloOpcode::kCos; },
       {"foo", "baz"});
 
diff --git a/tensorflow/compiler/xla/tests/reduce_precision_test.cc b/tensorflow/compiler/xla/tests/reduce_precision_test.cc
index c2a708e872..4756ba0968 100644
--- a/tensorflow/compiler/xla/tests/reduce_precision_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_precision_test.cc
@@ -270,7 +270,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, ReducePrecisionBeforeFusion) {
   auto reduce_precision_pass = execution_options_.mutable_debug_options()
                                    ->add_hlo_reduce_precision_options();
   *reduce_precision_pass = ReducePrecisionInsertion::make_options_proto(
-      HloReducePrecisionOptions::BEFORE_OP_FUSION, 5, 10,
+      HloReducePrecisionOptions::OP_OUTPUTS, 5, 10,
       [](const HloOpcode opcode) { return opcode == HloOpcode::kAbs; });
 
   ComputeAndCompareR1(&builder, {0.0f}, {a_data.get()});
@@ -294,7 +294,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, ReducePrecisionSkippedAfterFusion) {
   auto reduce_precision_pass = execution_options_.mutable_debug_options()
                                    ->add_hlo_reduce_precision_options();
   *reduce_precision_pass = ReducePrecisionInsertion::make_options_proto(
-      HloReducePrecisionOptions::AFTER_OP_FUSION, 5, 10,
+      HloReducePrecisionOptions::UNFUSED_OP_OUTPUTS, 5, 10,
       [](const HloOpcode opcode) { return opcode == HloOpcode::kAbs; });
 
   ComputeAndCompareR1(&builder, {-1.00001f}, {a_data.get()});
@@ -316,7 +316,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, ReducePrecisionAddedAfterFusion) {
   auto reduce_precision_pass = execution_options_.mutable_debug_options()
                                    ->add_hlo_reduce_precision_options();
   *reduce_precision_pass = ReducePrecisionInsertion::make_options_proto(
-      HloReducePrecisionOptions::AFTER_OP_FUSION, 5, 10,
+      HloReducePrecisionOptions::UNFUSED_OP_OUTPUTS, 5, 10,
       [](const HloOpcode opcode) { return opcode == HloOpcode::kFusion; });
 
   ComputeAndCompareR1(&builder, {-1.0f}, {a_data.get()});
@@ -339,7 +339,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, ReducePrecisionSkippedFusionContains) {
   auto reduce_precision_pass = execution_options_.mutable_debug_options()
                                    ->add_hlo_reduce_precision_options();
   *reduce_precision_pass = ReducePrecisionInsertion::make_options_proto(
-      HloReducePrecisionOptions::FUSION_BY_CONTENT, 5, 10,
+      HloReducePrecisionOptions::FUSION_OUTPUTS_BY_CONTENT, 5, 10,
       [](const HloOpcode opcode) { return opcode == HloOpcode::kCos; });
 
   ComputeAndCompareR1(&builder, {-1.00001f}, {a_data.get()});
@@ -362,7 +362,7 @@ XLA_TEST_F(ReducePrecisionInsertionTest, ReducePrecisionAddedFusionContains) {
   auto reduce_precision_pass = execution_options_.mutable_debug_options()
                                    ->add_hlo_reduce_precision_options();
   *reduce_precision_pass = ReducePrecisionInsertion::make_options_proto(
-      HloReducePrecisionOptions::FUSION_BY_CONTENT, 5, 10,
+      HloReducePrecisionOptions::FUSION_OUTPUTS_BY_CONTENT, 5, 10,
       [](const HloOpcode opcode) { return opcode == HloOpcode::kAbs; });
 
   ComputeAndCompareR1(&builder, {-1.0f}, {a_data.get()});
diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
index 6f62d77208..94575a292c 100644
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto
@@ -22,19 +22,28 @@ package xla;
 
 // Options for the HLO insert-reduce-precision-operations pass.
 message HloReducePrecisionOptions {
-  // When and how the pass will be run.
-  enum PassTiming {
-    // Pass runs before operation fusion or other optimization occurs,
-    // selecting operations to suffix according to their type.
-    BEFORE_OP_FUSION = 0;
-    // Pass runs after the operation-fusion pass, selecting operations to
-    // suffix according to their type.
-    AFTER_OP_FUSION = 1;
-    // Pass runs after the operation-fusion pass, and selects fusion operations
-    // to suffix according to their contents.
-    FUSION_BY_CONTENT = 2;
+  // Where and when the reduce-precision operations will be added.
+  enum Location {
+    // Add reduce-precision operations to the inputs of selected instructions.
+    // This is done before any optimization occurs.
+    OP_INPUTS = 0;
+    // Add reduce-precision operations to the outputs of selected instructions.
+    // This is done before any optimization occurs.
+    OP_OUTPUTS = 1;
+    // After operation-fusion occurs, add reduce-precision operations to the
+    // outputs of any selected instructions that have not been fused into
+    // fusion instructions.
+    UNFUSED_OP_OUTPUTS = 2;
+    // After operation-fusion occurs, add reduce-precision operations to the
+    // outputs of any fusion instructions that contain operations matching the
+    // selection criteria.
+    FUSION_INPUTS_BY_CONTENT = 3;
+    // After operation-fusion occurs, add reduce-precision operations to the
+    // outputs of any fusion instructions that contain operations matching the
+    // selection criteria.
+    FUSION_OUTPUTS_BY_CONTENT = 4;
   }
-  PassTiming pass_timing = 1;
+  Location location = 1;
 
   // Exponent and mantissa bit counts for the reduced precision.
   uint32 exponent_bits = 2;
-- 
GitLab


From 186164b32c77a6236a1e24ea04efdad4477c8ead Mon Sep 17 00:00:00 2001
From: Benoit Steiner 
Date: Mon, 28 Aug 2017 15:04:36 -0700
Subject: [PATCH 039/294] Turn on arithmetic optimization by default

PiperOrigin-RevId: 166763151
---
 tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc | 5 ++++-
 tensorflow/core/grappler/optimizers/meta_optimizer.cc       | 4 ++--
 tensorflow/python/framework/test_util.py                    | 2 ++
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index d6684d9e89..d5f7401785 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -138,7 +138,10 @@ bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const {
   if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) {
     return false;
   }
-  if (IsEnter(node) || IsPlaceholder(node)) {
+  if (IsEnter(node) || IsExit(node) || IsPlaceholder(node)) {
+    return false;
+  }
+  if (node.device().find("SPU") != string::npos) {
     return false;
   }
   const OpDef* op_def = nullptr;
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 13efc55902..4ac985b41b 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -66,7 +66,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       optimizers.push_back(
           std::unique_ptr(new ConstantFolding()));
     }
-    if (cfg_.arithmetic_optimization() == RewriterConfig::ON) {
+    if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) {
       optimizers.push_back(
           std::unique_ptr(new ArithmeticOptimizer()));
     }
@@ -138,7 +138,7 @@ void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item,
 bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
   return !cfg.disable_model_pruning() || cfg.optimize_tensor_layout() ||
          cfg.constant_folding() != RewriterConfig::OFF ||
-         cfg.arithmetic_optimization() == RewriterConfig::ON ||
+         cfg.arithmetic_optimization() != RewriterConfig::OFF ||
          cfg.auto_parallel().enable() || cfg.memory_optimization() > 1 ||
          !cfg.optimizers().empty();
 }
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 2ed69f4da7..be51a38daf 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -594,6 +594,8 @@ class TensorFlowTestCase(googletest.TestCase):
       config.graph_options.optimizer_options.opt_level = -1
       config.graph_options.rewrite_options.constant_folding = (
           rewriter_config_pb2.RewriterConfig.OFF)
+      config.graph_options.rewrite_options.arithmetic_optimization = (
+          rewriter_config_pb2.RewriterConfig.OFF)
       return config
 
     if graph is None:
-- 
GitLab


From f6376e04439eac6cf7c8f9c32276690b3f338763 Mon Sep 17 00:00:00 2001
From: Yao Zhang 
Date: Mon, 28 Aug 2017 15:06:32 -0700
Subject: [PATCH 040/294] Support multiple Enter nodes and map their frame
 names to frame ids.

PiperOrigin-RevId: 166763479
---
 tensorflow/core/grappler/utils/frame.cc      |  31 +++++-
 tensorflow/core/grappler/utils/frame_test.cc | 110 ++++++++++++++++++-
 2 files changed, 131 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/grappler/utils/frame.cc b/tensorflow/core/grappler/utils/frame.cc
index ff7be6f701..7655d0bee5 100644
--- a/tensorflow/core/grappler/utils/frame.cc
+++ b/tensorflow/core/grappler/utils/frame.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/utils/frame.h"
 #include 
 #include 
+#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/utils.h"
@@ -32,9 +33,10 @@ int IdentifyFrames(
     if (node.input_size() == 0) {
       std::vector empty;
       ready_nodes.emplace_back(&node, empty);
+      (*frames)[&node] = empty;
     }
   }
-  int frame_id = 0;
+  std::map name_to_id;
   while (!ready_nodes.empty()) {
     auto ready_node = ready_nodes.front();
     for (const auto& fanout : node_map.GetOutputs(ready_node.first->name())) {
@@ -44,18 +46,35 @@ int IdentifyFrames(
           frame_ids.pop_back();
         }
         if (IsEnter(*fanout)) {
-          frame_ids.push_back(frame_id);
-          frame_id++;
+          CHECK(fanout->attr().count("frame_name"))
+              << "Missing frame name for the Enter node " << fanout->name();
+          string name = fanout->attr().at("frame_name").s();
+          int id;
+          if (name_to_id.count(name)) {
+            id = name_to_id[name];
+          } else {
+            id = name_to_id.size();
+            name_to_id[name] = id;
+          }
+          frame_ids.push_back(id);
         }
         ready_nodes.emplace_back(fanout, frame_ids);
+        (*frames)[fanout] = frame_ids;
       } else {
-        CHECK(ready_node.second == (*frames)[fanout]);
+        auto frame_ids_fanout = (*frames)[fanout];
+        auto frame_ids_node = ready_node.second;
+        if (IsEnter(*fanout)) {
+          frame_ids_fanout.pop_back();
+        }
+        if (IsExit(*ready_node.first)) {
+          frame_ids_node.pop_back();
+        }
+        CHECK(frame_ids_node == frame_ids_fanout);
       }
     }
-    (*frames)[ready_node.first] = ready_node.second;
     ready_nodes.pop_front();
   }
-  return frame_id;
+  return name_to_id.size();
 }
 
 }  // namespace grappler
diff --git a/tensorflow/core/grappler/utils/frame_test.cc b/tensorflow/core/grappler/utils/frame_test.cc
index e9b224ff4a..95ac857302 100644
--- a/tensorflow/core/grappler/utils/frame_test.cc
+++ b/tensorflow/core/grappler/utils/frame_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/utils/frame.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
@@ -26,15 +27,25 @@ class IdentifyFramesTest : public ::testing::Test {
  protected:
   static NodeDef CreateNode(const string& name,
                             const std::vector& inputs) {
-    return CreateNode(name, "", inputs);
+    return CreateNode(name, "", "", inputs);
   }
   static NodeDef CreateNode(const string& name, const string& op,
                             const std::vector& inputs) {
+    return CreateNode(name, op, "", inputs);
+  }
+  static NodeDef CreateNode(const string& name, const string& op,
+                            const string& frame,
+                            const std::vector& inputs) {
     NodeDef node;
     node.set_name(name);
     if (!op.empty()) {
       node.set_op(op);
     }
+    if (!frame.empty()) {
+      AttrValue frame_name;
+      frame_name.set_s(frame);
+      node.mutable_attr()->insert({"frame_name", frame_name});
+    }
     for (const string& input : inputs) {
       node.add_input(input);
     }
@@ -42,17 +53,19 @@ class IdentifyFramesTest : public ::testing::Test {
   }
 };
 
-TEST_F(IdentifyFramesTest, WithLoop) {
+TEST_F(IdentifyFramesTest, NestedLoop) {
   GraphDef graph;
   // Create a two-level nested loop
   *graph.add_node() = CreateNode("0", {});
-  *graph.add_node() = CreateNode("1", "Enter", {"0"});
+  *graph.add_node() =
+      CreateNode("1", "Enter", "map/while/while_context1", {"0"});
   *graph.add_node() = CreateNode("2", {"1"});
   *graph.add_node() = CreateNode("3", "Merge", {"2", "14"});
   *graph.add_node() = CreateNode("4", {"3"});
   *graph.add_node() = CreateNode("5", "Switch", {"4"});
   *graph.add_node() = CreateNode("6", {"5"});
-  *graph.add_node() = CreateNode("7", "Enter", {"6"});
+  *graph.add_node() =
+      CreateNode("7", "Enter", "map/while/while_context2", {"6"});
   *graph.add_node() = CreateNode("8", {"7"});
   *graph.add_node() = CreateNode("9", "Merge", {"8", "12"});
   *graph.add_node() = CreateNode("10", {"9"});
@@ -73,9 +86,98 @@ TEST_F(IdentifyFramesTest, WithLoop) {
       {"12", {0, 1}}, {"13", {0, 1}}, {"14", {0}},    {"15", {0}},
       {"16", {0}},    {"17", {}}};
   EXPECT_EQ(num_frames, 2);
+  EXPECT_EQ(frames.size(), expected.size());
+  std::cout << "Number of frame: " << num_frames << std::endl;
+  for (const auto& node : frames) {
+    std::cout << node.first->name() << ": ";
+    EXPECT_EQ(node.second.size(), expected[node.first->name()].size());
+    for (int i = 0; i < node.second.size(); i++) {
+      EXPECT_EQ(expected[node.first->name()][i], node.second[i]);
+      std::cout << node.second[i] << " ";
+    }
+    std::cout << std::endl;
+  }
+}
+
+TEST_F(IdentifyFramesTest, MultipleInputsToEnter) {
+  GraphDef graph;
+  *graph.add_node() = CreateNode("0", {});
+  *graph.add_node() = CreateNode("1", {});
+  *graph.add_node() =
+      CreateNode("2", "Enter", "map/while/while_context", {"0", "1"});
+  *graph.add_node() = CreateNode("3", "Exit", {"2"});
+
+  std::unordered_map> frames;
+  int num_frames = IdentifyFrames(graph, &frames);
+  std::unordered_map> expected = {
+      {"0", {}}, {"1", {}}, {"2", {0}}, {"3", {0}}};
+  EXPECT_EQ(num_frames, 1);
+  EXPECT_EQ(frames.size(), expected.size());
+  std::cout << "Number of frame: " << num_frames << std::endl;
+  for (const auto& node : frames) {
+    std::cout << node.first->name() << ": ";
+    EXPECT_EQ(node.second.size(), expected[node.first->name()].size());
+    for (int i = 0; i < node.second.size(); i++) {
+      EXPECT_EQ(expected[node.first->name()][i], node.second[i]);
+      std::cout << node.second[i] << " ";
+    }
+    std::cout << std::endl;
+  }
+}
+
+TEST_F(IdentifyFramesTest, ExitOutput) {
+  GraphDef graph;
+  *graph.add_node() = CreateNode("0", {});
+  *graph.add_node() =
+      CreateNode("1", "Enter", "map/while/while_context", {"0"});
+  *graph.add_node() = CreateNode("2", "Exit", {"1"});
+  *graph.add_node() = CreateNode("3", {});
+  *graph.add_node() = CreateNode("4", {"2", "3"});
+
+  std::unordered_map> frames;
+  int num_frames = IdentifyFrames(graph, &frames);
+  std::unordered_map> expected = {
+      {"0", {}}, {"1", {0}}, {"2", {0}}, {"3", {}}, {"4", {}}};
+  EXPECT_EQ(num_frames, 1);
+  EXPECT_EQ(frames.size(), expected.size());
+  std::cout << "Number of frame: " << num_frames << std::endl;
+  for (const auto& node : frames) {
+    std::cout << node.first->name() << ": ";
+    EXPECT_EQ(node.second.size(), expected[node.first->name()].size());
+    for (int i = 0; i < node.second.size(); i++) {
+      EXPECT_EQ(expected[node.first->name()][i], node.second[i]);
+      std::cout << node.second[i] << " ";
+    }
+    std::cout << std::endl;
+  }
+}
+
+TEST_F(IdentifyFramesTest, MultipleEnterNodes) {
+  GraphDef graph;
+  *graph.add_node() = CreateNode("0", {});
+  string frame = "map/while/while_context";
+  *graph.add_node() = CreateNode("1", "Enter", frame, {"0"});
+  *graph.add_node() = CreateNode("2", {"1"});
+  *graph.add_node() = CreateNode("5", {});
+  *graph.add_node() = CreateNode("4", "Enter", frame, {"5"});
+  *graph.add_node() = CreateNode("3", {"4", "2"});
+  *graph.add_node() = CreateNode("4", {"5"});
+  *graph.add_node() = CreateNode("6", "Merge", {"3", "8"});
+  *graph.add_node() = CreateNode("7", "Switch", {"6"});
+  *graph.add_node() = CreateNode("8", "NextIteration", {"7"});
+  *graph.add_node() = CreateNode("9", "Exit", {"7"});
+
+  std::unordered_map> frames;
+  int num_frames = IdentifyFrames(graph, &frames);
+  std::unordered_map> expected = {
+      {"0", {}}, {"1", {0}}, {"2", {0}}, {"3", {0}}, {"4", {0}},
+      {"5", {}}, {"6", {0}}, {"7", {0}}, {"8", {0}}, {"9", {0}}};
+  EXPECT_EQ(num_frames, 1);
+  EXPECT_EQ(frames.size(), expected.size());
   std::cout << "Number of frame: " << num_frames << std::endl;
   for (const auto& node : frames) {
     std::cout << node.first->name() << ": ";
+    EXPECT_EQ(node.second.size(), expected[node.first->name()].size());
     for (int i = 0; i < node.second.size(); i++) {
       EXPECT_EQ(expected[node.first->name()][i], node.second[i]);
       std::cout << node.second[i] << " ";
-- 
GitLab


From ff31921368cdc6d8cdbb46f38d2914f664ad8447 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Mon, 28 Aug 2017 15:11:26 -0700
Subject: [PATCH 041/294] Change libraries in tf2xla to be plain cc_libraries,
 not tf_kernel_library.

PiperOrigin-RevId: 166764293
---
 tensorflow/compiler/tf2xla/kernels/BUILD | 18 ++++++++++++------
 tensorflow/core/kernels/BUILD            |  6 ++++++
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index f5c3947009..d09e721c93 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -145,31 +145,35 @@ tf_kernel_library(
     ],
 )
 
-tf_kernel_library(
+cc_library(
     name = "gather_op_kernel_float_int32",
     srcs = ["gather_op_kernel_float_int32.cc"],
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/compiler/tf2xla:xla_local_runtime_context",
         "//tensorflow/core:framework_lite",
-        "//tensorflow/core/kernels:gather_functor",
+        "//tensorflow/core/kernels:bounds_check",
+        "//tensorflow/core/kernels:gather_functor_hdr",
         "//third_party/eigen3",
     ],
+    alwayslink = 1,
 )
 
-tf_kernel_library(
+cc_library(
     name = "gather_op_kernel_float_int64",
     srcs = ["gather_op_kernel_float_int64.cc"],
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/compiler/tf2xla:xla_local_runtime_context",
         "//tensorflow/core:framework_lite",
-        "//tensorflow/core/kernels:gather_functor",
+        "//tensorflow/core/kernels:bounds_check",
+        "//tensorflow/core/kernels:gather_functor_hdr",
         "//third_party/eigen3",
     ],
+    alwayslink = 1,
 )
 
-tf_kernel_library(
+cc_library(
     name = "index_ops_kernel_argmax_float_1d",
     srcs = ["index_ops_kernel_argmax_float_1d.cc"],
     visibility = ["//visibility:public"],
@@ -177,9 +181,10 @@ tf_kernel_library(
         "//tensorflow/core:framework_lite",
         "//third_party/eigen3",
     ],
+    alwayslink = 1,
 )
 
-tf_kernel_library(
+cc_library(
     name = "index_ops_kernel_argmax_float_2d",
     srcs = ["index_ops_kernel_argmax_float_2d.cc"],
     visibility = ["//visibility:public"],
@@ -187,6 +192,7 @@ tf_kernel_library(
         "//tensorflow/core:framework_lite",
         "//third_party/eigen3",
     ],
+    alwayslink = 1,
 )
 
 # -----------------------------------------------------------------------------
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index ae8109b1b9..b024cefb34 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1085,6 +1085,12 @@ tf_kernel_library(
     ],
 )
 
+# Unlike gather_functor library, this does not include the CUDA code and deps.
+cc_library(
+    name = "gather_functor_hdr",
+    hdrs = ["gather_functor.h"],
+)
+
 tf_kernel_library(
     name = "dense_update_functor",
     srcs = ["dense_update_functor.cc"],
-- 
GitLab


From 843d318e5f5e38d8f1cdeff0343a60e47eff542f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Mon, 28 Aug 2017 15:15:04 -0700
Subject: [PATCH 042/294] Add test to ensure that repeated flags are handled in
 a consistent way.

The tensorflow::Flags::Parse routine currently handles repeated flags by calling the hook function repeatedly for each instance of the flag.  This test allows us to rely on that behavior, rather than having it be an implementation detail that may change.

PiperOrigin-RevId: 166764798
---
 .../core/util/command_line_flags_test.cc      | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tensorflow/core/util/command_line_flags_test.cc b/tensorflow/core/util/command_line_flags_test.cc
index 6139c8e7bc..ad8c824461 100644
--- a/tensorflow/core/util/command_line_flags_test.cc
+++ b/tensorflow/core/util/command_line_flags_test.cc
@@ -224,6 +224,25 @@ TEST(CommandLineFlagsTest, FailedStringHook) {
   EXPECT_EQ(argc, 1);
 }
 
+TEST(CommandLineFlagsTest, RepeatedStringHook) {
+  int argc = 3;
+  std::vector argv_strings = {"program_name", "--some_name=this",
+                                      "--some_name=that"};
+  std::vector argv_array = CharPointerVectorFromStrings(argv_strings);
+  int call_count = 0;
+  bool parsed_ok = Flags::Parse(&argc, argv_array.data(),
+                                {Flag("some_name",
+                                      [&call_count](string value) {
+                                        call_count++;
+                                        return true;
+                                      },
+                                      "", "some name")});
+
+  EXPECT_EQ(true, parsed_ok);
+  EXPECT_EQ(argc, 1);
+  EXPECT_EQ(call_count, 2);
+}
+
 // Return whether str==pat, but allowing any whitespace in pat
 // to match zero or more whitespace characters in str.
 static bool MatchWithAnyWhitespace(const string &str, const string &pat) {
-- 
GitLab


From b802832b0db5fd0a2f466d703de9da2399e7c6f8 Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou 
Date: Mon, 28 Aug 2017 15:17:51 -0700
Subject: [PATCH 043/294] Support overriding asset path; Support saved model as
 input;

PiperOrigin-RevId: 166765212
---
 tensorflow/core/grappler/BUILD                |   3 +
 .../core/grappler/grappler_item_builder.cc    | 183 +++++++++++-------
 .../core/grappler/grappler_item_builder.h     |   2 +
 .../grappler/grappler_item_builder_test.cc    | 132 +++++++++++++
 4 files changed, 255 insertions(+), 65 deletions(-)

diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index 152d33fe05..7fd261bfd5 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -91,6 +91,7 @@ cc_library(
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/grappler/inputs:utils",
@@ -116,11 +117,13 @@ cc_test(
     srcs = ["grappler_item_builder_test.cc"],
     deps = [
         ":grappler_item_builder",
+        "//google/protobuf:any",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/cc:functional_ops",
         "//tensorflow/cc:grad_testutil",
         "//tensorflow/cc:gradients",
         "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc
index b740e8a999..33081061c0 100644
--- a/tensorflow/core/grappler/grappler_item_builder.cc
+++ b/tensorflow/core/grappler/grappler_item_builder.cc
@@ -37,6 +37,8 @@ limitations under the License.
 #include "tensorflow/core/grappler/inputs/utils.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/protobuf_internal.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
 #include "tensorflow/core/protobuf/saver.pb.h"
 #include "tensorflow/core/public/session_options.h"
@@ -163,6 +165,104 @@ std::unique_ptr GrapplerItemFromMetaGraphDef(
     return nullptr;
   }
 
+  // TODO(yuefengz): consider handling saved_model_main_op and legacy_init_op.
+  // The reason why they are difficult to handle is because they may not intend
+  // to initialize all variables that are required to run fetch nodes. We may
+  // have to run restore op first.
+
+  // Try to find initializers from variables and tables as init ops.
+  for (const string& var_collection :
+       {"variables", "local_variables", "model_variables",
+        "trainable_variables"}) {
+    if (meta_graph.collection_def().count(var_collection) == 0) {
+      continue;
+    }
+    const CollectionDef& vars = meta_graph.collection_def().at(var_collection);
+    for (const auto& raw_var : vars.bytes_list().value()) {
+      VariableDef var;
+      var.ParseFromString(raw_var);
+      if (!var.initializer_name().empty()) {
+        new_item->init_ops.push_back(NodeName(var.initializer_name()));
+      }
+    }
+  }
+
+  if (meta_graph.collection_def().count("table_initializer") > 0) {
+    const CollectionDef& inits =
+        meta_graph.collection_def().at("table_initializer");
+    if (inits.has_node_list()) {
+      for (const auto& node : inits.node_list().value()) {
+        new_item->init_ops.push_back(NodeName(node));
+        // Tables are initialized from files, which can take a long time. Add
+        // 30 minutes to the initialization time for each table to avoid
+        // timing out.
+        // TODO(bsteiner): adjust the timeout based on the file size.
+        new_item->expected_init_time += 30 * 60;
+      }
+    }
+  }
+
+  // We keep the mapping from asset node to asset files. This should have been
+  // used as feed but since asset node is usually a constant node, we will fill
+  // the values of these constant nodes with their actual asset file paths.
+  std::unordered_map asset_node_to_value;
+
+  // Assets file may have changed their directory, we assemble their new paths
+  // if assets_directory_override is set. We also make sure we still can
+  // access these asset files.
+  if (!cfg.assets_directory_override.empty()) {
+    if (meta_graph.collection_def().count("saved_model_assets") > 0) {
+      const CollectionDef& collection =
+          meta_graph.collection_def().at("saved_model_assets");
+      const auto& any_assets = collection.any_list().value();
+      for (const auto& any_asset : any_assets) {
+        AssetFileDef asset_file_def;
+        if (!ParseAny(any_asset, &asset_file_def, "tensorflow.AssetFileDef")
+                 .ok()) {
+          LOG(ERROR) << "Failed to parse AssetFile.";
+          continue;
+        }
+        string asset_filepath = io::JoinPath(cfg.assets_directory_override,
+                                             asset_file_def.filename());
+        if (!FilesExist({asset_filepath}, nullptr)) {
+          LOG(ERROR) << "Can't access one or more of the asset files "
+                     << asset_filepath << ", skipping this input";
+          return nullptr;
+        }
+        asset_node_to_value[NodeName(asset_file_def.tensor_info().name())] =
+            asset_filepath;
+      }
+    }
+  } else if (meta_graph.collection_def().count("asset_filepaths") > 0) {
+    const CollectionDef& file_paths =
+        meta_graph.collection_def().at("asset_filepaths");
+    std::vector paths;
+    for (const auto& raw_path : file_paths.bytes_list().value()) {
+      paths.push_back(raw_path);
+    }
+    if (!FilesExist(paths, nullptr)) {
+      LOG(ERROR) << "Can't access one or more of the asset files, skipping "
+                    "this input";
+      return nullptr;
+    }
+  }
+
+  if (meta_graph.collection_def().count("queue_runners") > 0) {
+    const CollectionDef& vars = meta_graph.collection_def().at("queue_runners");
+    for (const auto& raw : vars.bytes_list().value()) {
+      QueueRunnerDef queue_runner;
+      if (!queue_runner.ParseFromString(raw)) {
+        LOG(ERROR) << "Could not parse queue_runners, skipping this input";
+        return nullptr;
+      }
+      if (queue_runner.cancel_op_name().empty()) {
+        LOG(ERROR) << "Queue without a cancel op, skipping this input";
+        return nullptr;
+      }
+      new_item->queue_runners.push_back(queue_runner);
+    }
+  }
+
   for (auto& node : *new_item->graph.mutable_node()) {
     if (IsPlaceholder(node)) {
       if (node.attr().count("dtype") == 0) {
@@ -248,6 +348,24 @@ std::unique_ptr GrapplerItemFromMetaGraphDef(
       // inferring shapes and is a no-op when dynamically inferring shapes as
       // the Placeholder shape will match the shape passed from new_item->feed.
       *(node.mutable_attr()->at("shape").mutable_shape()) = shape_proto;
+    } else if (IsConstant(node)) {
+      auto it = asset_node_to_value.find(node.name());
+      if (it != asset_node_to_value.end()) {
+        auto iter = node.mutable_attr()->find("value");
+        if (iter == node.attr().end()) {
+          LOG(ERROR) << "Value attribute expected in const op for asset files";
+          return nullptr;
+        }
+        if (!iter->second.has_tensor() ||
+            iter->second.tensor().string_val_size() != 1) {
+          LOG(INFO) << "Unexected AttrValue proto: "
+                    << iter->second.DebugString();
+          return nullptr;
+        }
+        LOG(INFO) << "Using asset file " << it->second << " for node "
+                  << node.name();
+        *(iter->second.mutable_tensor()->mutable_string_val(0)) = it->second;
+      }
     }
 
     // Erase the recorded result of any previous shape inference to start again
@@ -268,71 +386,6 @@ std::unique_ptr GrapplerItemFromMetaGraphDef(
     }
   }
 
-  for (const string& var_collection :
-       {"variables", "local_variables", "model_variables",
-        "trainable_variables"}) {
-    if (meta_graph.collection_def().count(var_collection) == 0) {
-      continue;
-    }
-    const CollectionDef& vars = meta_graph.collection_def().at(var_collection);
-    for (const auto& raw_var : vars.bytes_list().value()) {
-      VariableDef var;
-      var.ParseFromString(raw_var);
-      if (!var.initializer_name().empty()) {
-        new_item->init_ops.push_back(var.initializer_name());
-      }
-    }
-  }
-
-  if (meta_graph.collection_def().count("table_initializer") > 0) {
-    const CollectionDef& inits =
-        meta_graph.collection_def().at("table_initializer");
-    if (inits.has_node_list()) {
-      for (const auto& node : inits.node_list().value()) {
-        new_item->init_ops.push_back(node);
-        // Tables are initialized from files, which can take a long time. Add 30
-        // minutes to the initialization time for each table to avoid timing
-        // out.
-        // TODO(bsteiner): adjust the timeout based on the file size.
-        new_item->expected_init_time += 30 * 60;
-      }
-    }
-  }
-
-  if (meta_graph.collection_def().count("queue_runners") > 0) {
-    const CollectionDef& vars = meta_graph.collection_def().at("queue_runners");
-    for (const auto& raw : vars.bytes_list().value()) {
-      QueueRunnerDef queue_runner;
-      if (!queue_runner.ParseFromString(raw)) {
-        LOG(ERROR) << "Could parse queue_runners, skipping this input";
-        return nullptr;
-      }
-      if (queue_runner.cancel_op_name().empty()) {
-        LOG(ERROR) << "Queue without a cancel op, skipping this input";
-        return nullptr;
-      }
-      new_item->queue_runners.push_back(queue_runner);
-    }
-  }
-
-  // Make sure we still can access the input files (aka "asset_filepaths") since
-  // these might have been moved or deleted, the cns cell might have been shut
-  // down, or we might be running as a user who does not have access to the
-  // files.
-  if (meta_graph.collection_def().count("asset_filepaths") > 0) {
-    const CollectionDef& file_paths =
-        meta_graph.collection_def().at("asset_filepaths");
-    std::vector paths;
-    for (const auto& raw_path : file_paths.bytes_list().value()) {
-      paths.push_back(raw_path);
-    }
-    if (!FilesExist(paths, nullptr)) {
-      LOG(ERROR)
-          << "Can't access one or more of the asset files, skipping this input";
-      return nullptr;
-    }
-  }
-
   if (meta_graph.collection_def().count("savers") > 0) {
     const CollectionDef& savers = meta_graph.collection_def().at("savers");
     for (const auto& raw : savers.bytes_list().value()) {
diff --git a/tensorflow/core/grappler/grappler_item_builder.h b/tensorflow/core/grappler/grappler_item_builder.h
index d385a1916e..4ce5055e7a 100644
--- a/tensorflow/core/grappler/grappler_item_builder.h
+++ b/tensorflow/core/grappler/grappler_item_builder.h
@@ -45,6 +45,8 @@ struct ItemConfig {
   bool apply_optimizations;
   // If true, does inlining.
   bool inline_functions;
+  // If non-empty, override the directory of asset paths.
+  string assets_directory_override;
 };
 
 // Factory method for creating a GrapplerItem from a MetaGraphDef.
diff --git a/tensorflow/core/grappler/grappler_item_builder_test.cc b/tensorflow/core/grappler/grappler_item_builder_test.cc
index 048870f9e5..4272179d3c 100644
--- a/tensorflow/core/grappler/grappler_item_builder_test.cc
+++ b/tensorflow/core/grappler/grappler_item_builder_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/grappler_item_builder.h"
+#include "google/protobuf/any.pb.h"
 #include "tensorflow/cc/framework/gradients.h"
 #include "tensorflow/cc/gradients/grad_testutil.h"
 #include "tensorflow/cc/ops/functional_ops.h"
@@ -22,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
 
@@ -121,6 +123,136 @@ TEST_F(GrapplerItemBuilderTest, SymbolicGradientInlining) {
             CountOpsWithNames(with_inline, ops_of_inline));
 }
 
+TEST_F(GrapplerItemBuilderTest, AssetFilepathOverrideTest) {
+  MetaGraphDef meta_graph;
+
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output var =
+      ops::Variable(s.WithOpName("var"), TensorShape(), DataType::DT_FLOAT);
+  Output filename_node =
+      ops::Const(s.WithOpName("filename"), string("model"), TensorShape());
+  Output tensor_name =
+      ops::Const(s.WithOpName("tensorname"), string("var"), TensorShape());
+  Output restore = ops::Restore(s.WithOpName("restore"), filename_node,
+                                tensor_name, DataType::DT_FLOAT);
+  Output assign = ops::Assign(s.WithOpName("assign"), var, restore);
+
+  TF_CHECK_OK(s.ToGraphDef(meta_graph.mutable_graph_def()));
+
+  string temp_dir = testing::TmpDir();
+
+  Env *env = Env::Default();
+  string filename =
+      io::JoinPath(temp_dir, "grappler_item_builder_test_filename");
+  env->DeleteFile(filename).IgnoreError();
+  std::unique_ptr file_to_write;
+  TF_CHECK_OK(env->NewWritableFile(filename, &file_to_write));
+  TF_CHECK_OK(file_to_write->Close());
+  TF_CHECK_OK(env->FileExists(filename));
+  LOG(INFO) << filename;
+
+  AssetFileDef asset_file_def;
+  *asset_file_def.mutable_tensor_info()->mutable_name() = "filename";
+  *asset_file_def.mutable_filename() = "grappler_item_builder_test_filename";
+
+  (*meta_graph.mutable_collection_def())["saved_model_assets"]
+      .mutable_any_list()
+      ->add_value()
+      ->PackFrom(asset_file_def);
+  *((*meta_graph.mutable_collection_def())["train_op"]
+        .mutable_node_list()
+        ->add_value()) = "assign";
+
+  ItemConfig cfg;
+  cfg.assets_directory_override = temp_dir;
+
+  std::unique_ptr item =
+      GrapplerItemFromMetaGraphDef("0", meta_graph, cfg);
+  ASSERT_TRUE(item != nullptr);
+  for (const NodeDef &node : item->graph.node()) {
+    if (node.name() == "filename") {
+      const auto iter = node.attr().find("value");
+      ASSERT_TRUE(iter != node.attr().end());
+      ASSERT_TRUE(iter->second.has_tensor());
+      ASSERT_EQ(1, iter->second.tensor().string_val_size());
+
+      string tensor_string_val = iter->second.tensor().string_val(0);
+      EXPECT_EQ(tensor_string_val, filename);
+    }
+  }
+}
+
+TEST_F(GrapplerItemBuilderTest, AssetFilepathOverrideTest_FileNotAccessible) {
+  MetaGraphDef meta_graph;
+
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output var =
+      ops::Variable(s.WithOpName("var"), TensorShape(), DataType::DT_FLOAT);
+  Output filename_node1 =
+      ops::Const(s.WithOpName("filename1"), string("model1"), TensorShape());
+  Output filename_node2 =
+      ops::Const(s.WithOpName("filename2"), string("model2"), TensorShape());
+  Output tensor_name =
+      ops::Const(s.WithOpName("tensorname"), string("var"), TensorShape());
+  Output restore1 = ops::Restore(s.WithOpName("restore1"), filename_node1,
+                                 tensor_name, DataType::DT_FLOAT);
+  Output restore2 = ops::Restore(s.WithOpName("restore2"), filename_node1,
+                                 tensor_name, DataType::DT_FLOAT);
+  Output assign1 = ops::Assign(s.WithOpName("assign1"), var, restore1);
+  Output assign2 = ops::Assign(s.WithOpName("assign2"), var, restore2);
+
+  TF_CHECK_OK(s.ToGraphDef(meta_graph.mutable_graph_def()));
+
+  string temp_dir = testing::TmpDir();
+
+  // Create the first AssetFileDef that has a valid file.
+  Env *env = Env::Default();
+  string filename1 =
+      io::JoinPath(temp_dir, "grappler_item_builder_test_filename1");
+  env->DeleteFile(filename1).IgnoreError();
+  std::unique_ptr file_to_write;
+  TF_CHECK_OK(env->NewWritableFile(filename1, &file_to_write));
+  TF_CHECK_OK(file_to_write->Close());
+  TF_CHECK_OK(env->FileExists(filename1));
+
+  AssetFileDef asset_file_def1;
+  *asset_file_def1.mutable_tensor_info()->mutable_name() = "filename1";
+  *asset_file_def1.mutable_filename() = "grappler_item_builder_test_filename1";
+
+  // Create the second AssetFileDef that has not a valid file.
+  string filename2 =
+      io::JoinPath(temp_dir, "grappler_item_builder_test_filename1");
+  env->DeleteFile(filename2).IgnoreError();
+  EXPECT_FALSE(env->FileExists(filename2).ok());
+
+  AssetFileDef asset_file_def2;
+  *asset_file_def2.mutable_tensor_info()->mutable_name() = "filename2";
+  *asset_file_def2.mutable_filename() = "grappler_item_builder_test_filename2";
+
+  (*meta_graph.mutable_collection_def())["saved_model_assets"]
+      .mutable_any_list()
+      ->add_value()
+      ->PackFrom(asset_file_def1);
+  (*meta_graph.mutable_collection_def())["saved_model_assets"]
+      .mutable_any_list()
+      ->add_value()
+      ->PackFrom(asset_file_def2);
+
+  *((*meta_graph.mutable_collection_def())["train_op"]
+        .mutable_node_list()
+        ->add_value()) = "assign1";
+  *((*meta_graph.mutable_collection_def())["train_op"]
+        .mutable_node_list()
+        ->add_value()) = "assign2";
+
+  ItemConfig cfg;
+  cfg.assets_directory_override = temp_dir;
+
+  std::unique_ptr item =
+      GrapplerItemFromMetaGraphDef("0", meta_graph, cfg);
+  ASSERT_TRUE(item == nullptr);
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From fa75ba9c8706919df836c569a0f6d4e60d8a01d6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Mon, 28 Aug 2017 15:26:16 -0700
Subject: [PATCH 044/294] Add debug flag to disable expensive LLVM optimization
 passes.

RELNOTES: n/a
PiperOrigin-RevId: 166766323
---
 .../xla/legacy_flags/debug_options_flags.cc   |  8 +++
 .../xla/service/cpu/compiler_functor.cc       | 57 ++++++++++++++++++-
 .../xla/service/cpu/compiler_functor.h        |  3 +
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  3 +
 .../xla/service/cpu/simple_orc_jit.cc         | 14 +++--
 .../compiler/xla/service/cpu/simple_orc_jit.h |  4 +-
 tensorflow/compiler/xla/xla.proto             |  3 +
 7 files changed, 83 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
index 913b8650d2..34759dbc9b 100644
--- a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
@@ -35,6 +35,7 @@ void SetDebugOptionsDefaults(DebugOptions* flags) {
   flags->set_xla_llvm_enable_alias_scope_metadata(true);
   flags->set_xla_llvm_enable_noalias_metadata(true);
   flags->set_xla_llvm_enable_invariant_load_metadata(true);
+  flags->set_xla_llvm_disable_expensive_passes(false);
   flags->set_xla_backend_optimization_level(3);
   flags->set_xla_cpu_multi_thread_eigen(true);
   flags->set_xla_gpu_cuda_data_dir("./cuda_sdk_lib");
@@ -157,6 +158,13 @@ void AllocateFlags() {
            "In LLVM-based backends, enable the emission of "
            "!invariant.load metadata in "
            "the generated IR."),
+       tensorflow::Flag(
+           "xla_llvm_disable_expensive_passes",
+           bool_setter_for(
+               &DebugOptions::set_xla_llvm_disable_expensive_passes),
+           flag_values->xla_llvm_disable_expensive_passes(),
+           "In LLVM-based backends, disable a custom set of "
+           "expensive optimization passes."),
        tensorflow::Flag(
            "xla_backend_optimization_level",
            int32_setter_for(&DebugOptions::set_xla_backend_optimization_level),
diff --git a/tensorflow/compiler/xla/service/cpu/compiler_functor.cc b/tensorflow/compiler/xla/service/cpu/compiler_functor.cc
index 08eabd6682..141582c069 100644
--- a/tensorflow/compiler/xla/service/cpu/compiler_functor.cc
+++ b/tensorflow/compiler/xla/service/cpu/compiler_functor.cc
@@ -59,10 +59,63 @@ CompilerFunctor::AllIntrinsics() {
   return intrinsics;
 }
 
+/* Create filtered versions of the LLVM Pass Managers to filter out some
+of the expensive passes.
+Profiling:
+   learning/brain/google/xla/benchmarks:inception_cpu_benchmark
+   learning/brain/google/xla/benchmarks:cifarnet
+pointed to LICM and IndVarSimplify as the hottest passes.
+LICM is known to exhibit O(n^2) time in the number of instructions.
+IndVarSimplify is slow due to SCEV. If loops are emitted in canonical form,
+this pass is not necessary.
+Disabling these as a starting point.
+*/
+// TODO(b/64227304) Creating a custom pass pipeline will replace this.
+
+class FilteredFunctionPassManager : public llvm::legacy::FunctionPassManager {
+ public:
+  FilteredFunctionPassManager(llvm::Module* m, bool disable_expensive_passes)
+      : llvm::legacy::FunctionPassManager(m),
+        disable_expensive_passes_(disable_expensive_passes) {}
+  void add(llvm::Pass* p) override {
+    if (disable_expensive_passes_) {
+      llvm::StringRef PassName = p->getPassName();
+      if (PassName.contains("LICM") || PassName.contains("IndVarSimplify") ||
+          PassName.contains("LoopUnroll")) {
+        return;
+      }
+    }
+    llvm::legacy::FunctionPassManager::add(p);
+  }
+
+ private:
+  bool disable_expensive_passes_;
+};
+
+class FilteredPassManager : public llvm::legacy::PassManager {
+ public:
+  explicit FilteredPassManager(bool disable_expensive_passes)
+      : disable_expensive_passes_(disable_expensive_passes) {}
+  void add(llvm::Pass* p) override {
+    if (disable_expensive_passes_) {
+      llvm::StringRef PassName = p->getPassName();
+      if (PassName.contains("LICM") || PassName.contains("IndVarSimplify") ||
+          PassName.contains("LoopUnroll")) {
+        return;
+      }
+    }
+    llvm::legacy::PassManager::add(p);
+  }
+
+ private:
+  bool disable_expensive_passes_;
+};
+
 llvm::object::OwningBinary CompilerFunctor::
 operator()(llvm::Module& module) const {
-  llvm::legacy::PassManager module_passes;
-  llvm::legacy::FunctionPassManager function_passes(&module);
+  FilteredPassManager module_passes(disable_expensive_passes_);
+  FilteredFunctionPassManager function_passes(&module,
+                                              disable_expensive_passes_);
 
   VLOG(2) << "IR before optimizations";
   XLA_VLOG_LINES(2, llvm_ir::DumpModuleToString(module));
diff --git a/tensorflow/compiler/xla/service/cpu/compiler_functor.h b/tensorflow/compiler/xla/service/cpu/compiler_functor.h
index 7187f14c96..8cdd049e7b 100644
--- a/tensorflow/compiler/xla/service/cpu/compiler_functor.h
+++ b/tensorflow/compiler/xla/service/cpu/compiler_functor.h
@@ -44,6 +44,7 @@ class CompilerFunctor {
   explicit CompilerFunctor(
       llvm::TargetMachine* target_machine, const Disassembler* disassembler,
       int opt_level, bool optimize_for_size, bool enable_fast_math,
+      bool disable_expensive_passes,
       const VectorIntrinsics& available_intrinsics,
       LLVMCompiler::ModuleHook pre_optimization_hook = nullptr,
       LLVMCompiler::ModuleHook post_optimization_hook = nullptr)
@@ -52,6 +53,7 @@ class CompilerFunctor {
         opt_level_(opt_level),
         optimize_for_size_(optimize_for_size),
         enable_fast_math_(enable_fast_math),
+        disable_expensive_passes_(disable_expensive_passes),
         available_intrinsics_(available_intrinsics),
         pre_optimization_hook_(pre_optimization_hook),
         post_optimization_hook_(post_optimization_hook) {}
@@ -75,6 +77,7 @@ class CompilerFunctor {
   const unsigned opt_level_;
   const bool optimize_for_size_;
   const bool enable_fast_math_;
+  const bool disable_expensive_passes_;
   const VectorIntrinsics available_intrinsics_;
   LLVMCompiler::ModuleHook pre_optimization_hook_;
   LLVMCompiler::ModuleHook post_optimization_hook_;
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 1e331c1cb8..9c90c84d65 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -448,11 +448,13 @@ StatusOr> CpuCompiler::Compile(
   auto llvm_context = MakeUnique();
   auto llvm_module =
       MakeUnique("__compute_module", *llvm_context);
+
   auto jit = MakeUnique(
       CompilerTargetOptions(module->config()),
       CodeGenOptLevel(module->config()),
       options::OptimizeForSizeRequested(module->config()),
       module->config().debug_options().xla_enable_fast_math(),
+      module->config().debug_options().xla_llvm_disable_expensive_passes(),
       pre_optimization_ir_hook, post_optimization_ir_hook);
   llvm_module->setDataLayout(jit->data_layout());
   llvm_module->setTargetTriple(jit->target_triple().getTriple());
@@ -806,6 +808,7 @@ CpuCompiler::CompileAheadOfTime(std::vector> modules,
         target_machine.get(), &disassembler, opt_level,
         options::OptimizeForSizeRequested(module->config()),
         module->config().debug_options().xla_enable_fast_math(),
+        module->config().debug_options().xla_llvm_disable_expensive_passes(),
         CompilerFunctor::AllIntrinsics(), pre_optimization_ir_dump_hook,
         post_optimization_ir_dump_hook);
     llvm::object::OwningBinary object_file =
diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
index 573647534f..c3c11df090 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
@@ -176,6 +176,7 @@ CompilerFunctor::VectorIntrinsics GetAvailableIntrinsics() {
 SimpleOrcJIT::SimpleOrcJIT(const llvm::TargetOptions& target_options,
                            llvm::CodeGenOpt::Level opt_level,
                            bool optimize_for_size, bool enable_fast_math,
+                           bool disable_expensive_passes,
                            LLVMCompiler::ModuleHook pre_optimization_hook,
                            LLVMCompiler::ModuleHook post_optimization_hook)
     : target_machine_(
@@ -190,12 +191,13 @@ SimpleOrcJIT::SimpleOrcJIT(const llvm::TargetOptions& target_options,
       data_layout_(target_machine_->createDataLayout()),
       object_layer_(
           [] { return std::make_shared(); }),
-      compile_layer_(object_layer_,
-                     CompilerFunctor(target_machine_.get(), &disassembler_,
-                                     opt_level, optimize_for_size,
-                                     enable_fast_math, GetAvailableIntrinsics(),
-                                     std::move(pre_optimization_hook),
-                                     std::move(post_optimization_hook))) {
+      compile_layer_(
+          object_layer_,
+          CompilerFunctor(target_machine_.get(), &disassembler_, opt_level,
+                          optimize_for_size, enable_fast_math,
+                          disable_expensive_passes, GetAvailableIntrinsics(),
+                          std::move(pre_optimization_hook),
+                          std::move(post_optimization_hook))) {
   VLOG(1) << "CPU target: " << target_machine_->getTargetCPU().str()
           << " features: " << target_machine_->getTargetFeatureString().str();
 }
diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h
index 331e18bc8b..e476c0e381 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h
@@ -57,13 +57,15 @@ class SimpleOrcJIT {
   // generator.
   // The |optimize_for_size| parameter specifies that the code generator should
   // optimize to reduce code size, potentially at the cost of performance.
+  // The |disable_expensive_passes| parameter will disable certain optimization
+  // passes
   // The |pre_optimization_hook| is invoked on the module before any IR
   // level optimizations are applied.
   // The |post_optimization_hook| is invoked on the module after all IR
   // level optimizations are applied.
   SimpleOrcJIT(const llvm::TargetOptions& target_options,
                llvm::CodeGenOpt::Level opt_level, bool optimize_for_size,
-               bool enable_fast_math,
+               bool enable_fast_math, bool disable_expensive_passes,
                LLVMCompiler::ModuleHook pre_optimization_hook,
                LLVMCompiler::ModuleHook post_optimization_hook);
 
diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
index 94575a292c..4840ddb881 100644
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto
@@ -147,6 +147,9 @@ message DebugOptions {
   // the generated IR.
   bool xla_llvm_enable_invariant_load_metadata = 72;
 
+  // If true, a set of expensive LLVM optimization passes will not be run.
+  bool xla_llvm_disable_expensive_passes = 73;
+
   // Options for inserting reduce-precision operations for numerical
   // experimentation.  This is a repeated field, as we may want to have
   // multiple passes with different parameters.
-- 
GitLab


From 0cff60ebb29f5aba5092988c8b7f13c258115e81 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Mon, 28 Aug 2017 15:47:54 -0700
Subject: [PATCH 045/294] Adding a function to compute the natural log of the
 determinant for hermitian positive definite matrices in a numerically stable
 way via Cholesky decompositions. Based on draft by Jasper Snoek,
 jsnoek@google.com

This is also the first example of adding a new op in the tf.linalg namespace.

PiperOrigin-RevId: 166769461
---
 tensorflow/python/BUILD                       |  5 +-
 tensorflow/python/kernel_tests/BUILD          |  1 +
 .../python/kernel_tests/linalg_ops_test.py    | 44 +++++++--
 tensorflow/python/ops/linalg_impl.py          | 56 +++++++++++
 tensorflow/python/ops/linalg_ns.py            | 15 ++-
 .../tools/api/golden/tensorflow.linalg.pbtxt  | 96 +------------------
 6 files changed, 115 insertions(+), 102 deletions(-)
 create mode 100644 tensorflow/python/ops/linalg_impl.py

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index fa2bce843a..28c3eef57b 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1632,7 +1632,10 @@ py_library(
 
 py_library(
     name = "linalg_ns",
-    srcs = ["ops/linalg_ns.py"],
+    srcs = [
+        "ops/linalg_impl.py",
+        "ops/linalg_ns.py",
+    ],
     srcs_version = "PY2AND3",
     deps = [
         ":array_ops",
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 3e2cb82d02..2db686333d 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1479,6 +1479,7 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:linalg_ops",
+        "//tensorflow/python:linalg_ns",
         "//tensorflow/python:math_ops",
     ],
     tags = ["no_windows_gpu"],
diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py
index 0e096bbc09..c198e13f84 100644
--- a/tensorflow/python/kernel_tests/linalg_ops_test.py
+++ b/tensorflow/python/kernel_tests/linalg_ops_test.py
@@ -22,6 +22,7 @@ import numpy as np
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import linalg_ns as linalg
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
@@ -34,10 +35,12 @@ def _AddTest(test_class, op_name, testcase_name, fn):
   setattr(test_class, test_name, fn)
 
 
-def _RandomPDMatrix(n, rng):
+def _RandomPDMatrix(n, rng, dtype=np.float64):
   """Random positive definite matrix."""
-  temp = rng.randn(n, n)
-  return temp.dot(temp.T)
+  temp = rng.randn(n, n).astype(dtype)
+  if dtype in [np.complex64, np.complex128]:
+    temp.imag = rng.randn(n, n)
+  return np.conj(temp).dot(temp.T)
 
 
 class CholeskySolveTest(test.TestCase):
@@ -46,9 +49,9 @@ class CholeskySolveTest(test.TestCase):
     self.rng = np.random.RandomState(0)
 
   def test_works_with_five_different_random_pos_def_matrices(self):
-    with self.test_session(use_gpu=True):
-      for n in range(1, 6):
-        for np_type, atol in [(np.float32, 0.05), (np.float64, 1e-5)]:
+    for n in range(1, 6):
+      for np_type, atol in [(np.float32, 0.05), (np.float64, 1e-5)]:
+        with self.test_session(use_gpu=True):
           # Create 2 x n x n matrix
           array = np.array(
               [_RandomPDMatrix(n, self.rng),
@@ -61,6 +64,35 @@ class CholeskySolveTest(test.TestCase):
                 rhs, math_ops.matmul(array, x).eval(), atol=atol)
 
 
+class LogdetTest(test.TestCase):
+
+  def setUp(self):
+    self.rng = np.random.RandomState(42)
+
+  def test_works_with_five_different_random_pos_def_matrices(self):
+    for n in range(1, 6):
+      for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5),
+                             (np.complex64, 0.05), (np.complex128, 1e-5)]:
+        matrix = _RandomPDMatrix(n, self.rng, np_dtype)
+        _, logdet_np = np.linalg.slogdet(matrix)
+        with self.test_session(use_gpu=True):
+          # Create 2 x n x n matrix
+          # matrix = np.array(
+          #     [_RandomPDMatrix(n, self.rng, np_dtype),
+          #      _RandomPDMatrix(n, self.rng, np_dtype)]).astype(np_dtype)
+          logdet_tf = linalg.logdet(matrix)
+          self.assertAllClose(logdet_np, logdet_tf.eval(), atol=atol)
+
+  def test_works_with_underflow_case(self):
+    for np_dtype, atol in [(np.float32, 0.05), (np.float64, 1e-5),
+                           (np.complex64, 0.05), (np.complex128, 1e-5)]:
+      matrix = (np.eye(20) * 1e-6).astype(np_dtype)
+      _, logdet_np = np.linalg.slogdet(matrix)
+      with self.test_session(use_gpu=True):
+        logdet_tf = linalg.logdet(matrix)
+        self.assertAllClose(logdet_np, logdet_tf.eval(), atol=atol)
+
+
 class EyeTest(test.TestCase):
   pass  # Will be filled in below
 
diff --git a/tensorflow/python/ops/linalg_impl.py b/tensorflow/python/ops/linalg_impl.py
new file mode 100644
index 0000000000..ca57653d14
--- /dev/null
+++ b/tensorflow/python/ops/linalg_impl.py
@@ -0,0 +1,56 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Operations for linear algebra."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_linalg_ops
+from tensorflow.python.ops import math_ops
+
+
+def logdet(matrix, name=None):
+  """Computes log of the determinant of a hermitian positive definite matrix.
+
+  ```python
+  # Compute the determinant of a matrix while reducing the chance of over- or
+  underflow:
+  A = ... # shape 10 x 10
+  det = tf.exp(tf.logdet(A))  # scalar
+  ```
+
+  Args:
+    matrix:  A `Tensor`. Must be `float32`, `float64`, `complex64`, or
+      `complex128` with shape `[..., M, M]`.
+    name:  A name to give this `Op`.  Defaults to `logdet`.
+
+  Returns:
+    The natural log of the determinant of `matrix`.
+
+  @compatibility(numpy)
+  Equivalent to numpy.linalg.slogdet, although no sign is returned since only
+  hermitian positive definite matrices are supported.
+  @end_compatibility
+  """
+  # This uses the property that the log det(A) = 2*sum(log(real(diag(C))))
+  # where C is the cholesky decomposition of A.
+  with ops.name_scope(name, 'logdet', [matrix]):
+    chol = gen_linalg_ops.cholesky(matrix)
+    return 2.0 * math_ops.reduce_sum(
+        math_ops.log(math_ops.real(array_ops.matrix_diag_part(chol))),
+        reduction_indices=[-1])
diff --git a/tensorflow/python/ops/linalg_ns.py b/tensorflow/python/ops/linalg_ns.py
index 6451f91c72..ccd7e452a0 100644
--- a/tensorflow/python/ops/linalg_ns.py
+++ b/tensorflow/python/ops/linalg_ns.py
@@ -12,7 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Public API for tf.linalg namespace."""
+"""Public API for tf.linalg namespace.
+
+@@logdet
+"""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -23,6 +26,12 @@ from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import special_math_ops
 
+# go/tf-wildcard-import
+# pylint: disable=wildcard-import,unused-import
+from tensorflow.python.ops.linalg_impl import *
+# pylint: enable=wildcard-import
+from tensorflow.python.util.all_util import remove_undocumented
+
 # Linear algebra ops.
 band_part = array_ops.matrix_band_part
 cholesky = linalg_ops.cholesky
@@ -60,3 +69,7 @@ del linalg_ops
 del math_ops
 del print_function
 del special_math_ops
+
+_allowed_symbols = []
+
+remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
index 8d726ce8f1..4dc2e8e67d 100644
--- a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt
@@ -1,99 +1,7 @@
 path: "tensorflow.linalg"
 tf_module {
   member_method {
-    name: "band_part"
-    argspec: "args=[\'input\', \'num_lower\', \'num_upper\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "cholesky"
-    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "cholesky_solve"
-    argspec: "args=[\'chol\', \'rhs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "det"
-    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "diag"
-    argspec: "args=[\'diagonal\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "diag_part"
-    argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "eigh"
-    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "eigvalsh"
-    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "einsum"
-    argspec: "args=[\'equation\'], varargs=inputs, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "eye"
-    argspec: "args=[\'num_rows\', \'num_columns\', \'batch_shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"\", \'None\'], "
-  }
-  member_method {
-    name: "inv"
-    argspec: "args=[\'input\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "lstsq"
-    argspec: "args=[\'matrix\', \'rhs\', \'l2_regularizer\', \'fast\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'True\', \'None\'], "
-  }
-  member_method {
-    name: "norm"
-    argspec: "args=[\'tensor\', \'ord\', \'axis\', \'keep_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'euclidean\', \'None\', \'False\', \'None\'], "
-  }
-  member_method {
-    name: "qr"
-    argspec: "args=[\'input\', \'full_matrices\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "self_adjoint_eig"
-    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "self_adjoint_eigvals"
-    argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "set_diag"
-    argspec: "args=[\'input\', \'diagonal\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "solve"
-    argspec: "args=[\'matrix\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
-  }
-  member_method {
-    name: "solve_ls"
-    argspec: "args=[\'matrix\', \'rhs\', \'l2_regularizer\', \'fast\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'True\', \'None\'], "
-  }
-  member_method {
-    name: "svd"
-    argspec: "args=[\'tensor\', \'full_matrices\', \'compute_uv\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'True\', \'None\'], "
-  }
-  member_method {
-    name: "tensordot"
-    argspec: "args=[\'a\', \'b\', \'axes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "trace"
-    argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "transpose"
-    argspec: "args=[\'a\', \'name\'], varargs=None, keywords=None, defaults=[\'matrix_transpose\'], "
-  }
-  member_method {
-    name: "triangular_solve"
-    argspec: "args=[\'matrix\', \'rhs\', \'lower\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'False\', \'None\'], "
+    name: "logdet"
+    argspec: "args=[\'matrix\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
 }
-- 
GitLab


From bda0611e6251d1d065a3bb36f36f70fa6083b28a Mon Sep 17 00:00:00 2001
From: Benoit Steiner 
Date: Mon, 28 Aug 2017 15:52:09 -0700
Subject: [PATCH 046/294] Added a sanity check to make it easier to debug graph
 corruptions

PiperOrigin-RevId: 166769953
---
 tensorflow/core/grappler/utils.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index e2f722a8b1..948df18879 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -27,7 +27,9 @@ namespace grappler {
 NodeMap::NodeMap(GraphDef* graph) : graph_(graph) {
   for (int i = 0; i < graph_->node_size(); i++) {
     auto node = graph_->mutable_node(i);
-    nodes_.insert(std::make_pair(node->name(), node));
+    auto rslt = nodes_.insert(std::make_pair(node->name(), node));
+    // Check that the graph doesn't contain multiple nodes with the same name.
+    CHECK(rslt.second);
     for (const auto& input : node->input()) {
       outputs_[NodeName(input)].insert(nodes_[node->name()]);
     }
-- 
GitLab


From 5a1d6d9dac79b46f055462ee52125753524d9f6e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Mon, 28 Aug 2017 16:06:11 -0700
Subject: [PATCH 047/294] Implement Sum and Mean math gradients.

PiperOrigin-RevId: 166771825
---
 tensorflow/cc/BUILD                       |   1 +
 tensorflow/cc/gradients/math_grad.cc      | 138 ++++++++++++++++++++++
 tensorflow/cc/gradients/math_grad_test.cc |  18 +++
 3 files changed, 157 insertions(+)

diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index 369adee086..aa01767a41 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -276,6 +276,7 @@ cc_library(
         ":cc_ops",
         ":cc_ops_internal",
         ":grad_op_registry",
+        ":gradients",
     ],
     alwayslink = 1,
 )
diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc
index 77a82fc8ca..09a15fbe5f 100644
--- a/tensorflow/cc/gradients/math_grad.cc
+++ b/tensorflow/cc/gradients/math_grad.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/cc/ops/standard_ops.h"
 
 #include "tensorflow/cc/framework/grad_op_registry.h"
+#include "tensorflow/cc/framework/gradients.h"
 
 namespace tensorflow {
 namespace ops {
@@ -549,6 +550,143 @@ Status ConjGrad(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("Conj", ConjGrad);
 
+// Integer division x / y, assuming x and y >=0, but treats x/0 = x
+Output SafeDivHelper(const Scope& scope, const Output& x, const Output& y) {
+  return Div(scope, x, Maximum(scope, y, Const(scope, 1)));
+}
+
+// Helper function for reduction ops.
+//
+// input_shape: 1-D Tensor, the shape of the Tensor being reduced.
+// axes: 1-D Tensor, the reduction axes.
+//   Note that the reduction indices are in the range
+//   -rank(input_shape), rank(input_shape)
+// returns a 1-D Tensor, the output shape as if keep_dims were set to True.
+Output ReducedShapeHelper(const Scope& scope, const Output& input_shape,
+                          const Output& reduction_axes) {
+  auto zero = Const(scope, 0);
+  auto one = Const(scope, 1);
+
+  // Running example in comments
+  // input_shape = [2, 3, 5, 7]
+  // axes = [1, 2]
+  // The result (a shape after a reduction with keep_dims=True)
+  // [2, 1, 1, 7]
+  //
+  // We can treat each entry in axes as an index into input_shape that
+  // should be replaced by 1.
+  // We use DynamicStitch to do this.
+
+  // input_rank = 4
+  auto input_rank = Size(scope, input_shape);
+
+  // Normalize any negative indices in the reduction_axes to positive
+  // values.
+  auto axes = Mod(scope, Add(scope, reduction_axes, input_rank), input_rank);
+
+  // This [0..input_rank) range of integers is used in DynamicStitch to
+  // first copy input_shape to the result.
+  // input_rank_range = [0, 1, 2, 3]
+  auto input_rank_range = Range(scope, zero, input_rank, one);
+
+  // A 1-filled tensor with the same shape as axes. DynamicStitch will
+  // merge these 1s (using axes for indices) to the correct
+  // position in the result.
+  // axes_ones = [1, 1]
+  auto axes_ones = OnesLike(scope, axes);
+
+  // using DynamicStitch:
+  // indices = { input_rank_range, axes }
+  //         = { [0, 1, 2, 3], [1, 2] }
+  // data = { input_shape, axes_ones }
+  //      = { [2, 3, 5, 7], [1, 1] }
+  // The input_rank_range entry in indices first replicates the
+  // input_shape to the result.
+  // The axes entry in indices then moves a 1 to each of its entries,
+  // resulting in
+  // [2, 1, 1, 7]
+  std::vector indices = {input_rank_range, axes};
+  std::vector data = {input_shape, axes_ones};
+  return DynamicStitch(scope, indices, data);
+}
+
+// SumGradHelper returns the gradient for the Sum operator, and is used
+// by SumGrad and MeanGrad.
+Output SumGradHelper(const Scope& scope, const Operation& op,
+                     const std::vector& grad_inputs) {
+  // The partial derivative for any input along a "reduced" dimension
+  // is just 1, so we only need replicate the output gradient on such a
+  // dimension to its "expanded" shape.
+  // Running example:
+  // input is
+  // [[a, b, c],
+  //  [d, e, f]]
+  // reduction_indices = [1]
+  // Sum = [a + b + c, d + e + f]
+  // if the gradient is [g1, g2]
+  // We want the propagated gradient to be
+  // [[g1, g1, g1],
+  //  [g2, g2, g2]]
+
+  // input_shape = [2, 3]
+  auto input_shape = Shape(scope, op.input(0));
+
+  // output_shape_kept_dims = [2, 1]
+  auto output_shape_kept_dims =
+      ReducedShapeHelper(scope, input_shape, op.input(1));
+
+  // This step "flips" any 1s with values from the input_shape, and
+  // replaces remaining entries with 1. This creates a shape that
+  // shows how much each dimension in the incoming gradient should be
+  // replicated.
+  // tile_scaling = [1, 3]
+  auto tile_scaling = SafeDivHelper(scope, input_shape, output_shape_kept_dims);
+
+  // grad = [[g1], [g2]]
+  auto grad = Reshape(scope, grad_inputs[0], output_shape_kept_dims);
+
+  // tile(grad, tile_scaling) = [[g1, g1, g1], [g2, g2, g2]]
+  return Tile(scope, grad, tile_scaling);
+}
+
+Status SumGrad(const Scope& scope, const Operation& op,
+               const std::vector& grad_inputs,
+               std::vector* grad_outputs) {
+  grad_outputs->push_back(SumGradHelper(scope, op, grad_inputs));
+
+  // Stop propagation along reduction_indices
+  grad_outputs->push_back(NoGradient());
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("Sum", SumGrad);
+
+Status MeanGrad(const Scope& scope, const Operation& op,
+                const std::vector& grad_inputs,
+                std::vector* grad_outputs) {
+  // The Mean gradient is just like the Sum gradient, except that
+  // all gradients are also divided by the size of reduced groups.
+  auto sum_grad = SumGradHelper(scope, op, grad_inputs);
+
+  // The product of all entries in a tensor's shape is the total
+  // number of entries in the tensor. This step calculates
+  // n_input_entries/n_output_entries
+  // = group_size
+  auto input_shape = Shape(scope, op.input(0));
+  auto output_shape = Shape(scope, op.output(0));
+  auto zero = Const(scope, 0);
+  auto group_size = SafeDivHelper(scope, Prod(scope, input_shape, zero),
+                                  Prod(scope, output_shape, zero));
+
+  // propagate sum_grad/group_size
+  grad_outputs->push_back(
+      Div(scope, sum_grad, Cast(scope, group_size, sum_grad.type())));
+
+  // Stop propagation along reduction_indices
+  grad_outputs->push_back(NoGradient());
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("Mean", MeanGrad);
+
 // MatMulGrad helper function used to compute two MatMul operations
 // based on input matrix transposition combinations.
 Status MatMulGradHelper(const Scope& scope, const bool is_batch,
diff --git a/tensorflow/cc/gradients/math_grad_test.cc b/tensorflow/cc/gradients/math_grad_test.cc
index 45060c33f3..62b59b25c7 100644
--- a/tensorflow/cc/gradients/math_grad_test.cc
+++ b/tensorflow/cc/gradients/math_grad_test.cc
@@ -937,6 +937,24 @@ class NaryGradTest : public ::testing::Test {
   Scope scope_;
 };
 
+TEST_F(NaryGradTest, Sum) {
+  TensorShape x_shape({2, 3, 5, 7});
+  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
+  auto y = Sum(scope_, x, {1, -1});
+  // y's shape is the result of reducing x along axes 1 and -1 (= 3)
+  TensorShape y_shape({2, 5});
+  RunTest({x}, {x_shape}, {y}, {y_shape});
+}
+
+TEST_F(NaryGradTest, Mean) {
+  TensorShape x_shape({2, 3, 5, 7});
+  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
+  auto y = Mean(scope_, x, {1, -1});
+  // y's shape is the result of reducing x along axes 1 and -1 (= 3)
+  TensorShape y_shape({2, 5});
+  RunTest({x}, {x_shape}, {y}, {y_shape});
+}
+
 TEST_F(NaryGradTest, AddN) {
   TensorShape shape({3, 2, 5});
   std::vector xs;
-- 
GitLab


From 51eb71cd3b8353ba65ca3539539f4b69fda029dc Mon Sep 17 00:00:00 2001
From: meijun 
Date: Tue, 29 Aug 2017 07:42:09 +0800
Subject: [PATCH 048/294] fix markdown syntax mistakes

---
 tensorflow/core/profiler/g3doc/advise.md             |  4 ++--
 tensorflow/core/profiler/g3doc/command_line.md       |  6 +++---
 tensorflow/core/profiler/g3doc/options.md            |  8 ++++----
 tensorflow/core/profiler/g3doc/profile_memory.md     |  2 +-
 .../profiler/g3doc/profile_model_architecture.md     |  8 ++++----
 tensorflow/core/profiler/g3doc/profile_time.md       | 12 ++++++------
 6 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/tensorflow/core/profiler/g3doc/advise.md b/tensorflow/core/profiler/g3doc/advise.md
index d87b0d8603..d0de8317f6 100644
--- a/tensorflow/core/profiler/g3doc/advise.md
+++ b/tensorflow/core/profiler/g3doc/advise.md
@@ -86,7 +86,7 @@ For example:
 *   Checks RecvTensor RPC latency and bandwidth.
 *   Checks CPU/Memory utilization of the job.
 
-####AcceleratorUtilization Checker
+#### AcceleratorUtilization Checker
 * Checks what percentage of time the accelerator spends on computation.
 
 #### OperationChecker
@@ -100,7 +100,7 @@ For example:
 *   Checks the most expensive graph nodes.
 *   Checks the most expensive graph-building Python codes.
 
-####Contribute Your Checker
+#### Contribute Your Checker
 
 Follow examples of accelerator_utilization_checker.h
 
diff --git a/tensorflow/core/profiler/g3doc/command_line.md b/tensorflow/core/profiler/g3doc/command_line.md
index 857b5e6459..e2839a682f 100644
--- a/tensorflow/core/profiler/g3doc/command_line.md
+++ b/tensorflow/core/profiler/g3doc/command_line.md
@@ -51,7 +51,7 @@ It defines _checkpoint_variable op type. It also provides checkpointed tensors'
 Note: this feature is not well maintained now.
 
 
-###Start `tfprof`
+### Start `tfprof`
 
 #### Build `tfprof`
 
@@ -140,9 +140,9 @@ tfprof>
 -output
 ```
 
-###Examples
+### Examples
 
-####Profile Python Time
+#### Profile Python Time
 ```shell
 # Requires --graph_path --op_log_path
 tfprof> code -max_depth 1000 -show_name_regexes .*model_analyzer.*py.* -select micros -account_type_regexes .* -order_by micros
diff --git a/tensorflow/core/profiler/g3doc/options.md b/tensorflow/core/profiler/g3doc/options.md
index 15712d04c2..ddee63ad42 100644
--- a/tensorflow/core/profiler/g3doc/options.md
+++ b/tensorflow/core/profiler/g3doc/options.md
@@ -1,6 +1,6 @@
-##Options
+## Options
 
-###Overview
+### Overview
 
 For all tfprof views, the profiles are processed with the following procedures
 
@@ -35,7 +35,7 @@ For all tfprof views, the profiles are processed with the following procedures
 4) Finally, the filtered data structure is output in a format depending
    on the `-output` option.
 
-####Option Semantics In Different View
+#### Option Semantics In Different View
 options usually have the same semantics in different views. However, some
 can vary. For example `-max_depth` in scope view means the depth of
 name scope tree. In op view, it means the length of operation list.
@@ -68,7 +68,7 @@ output_bytes: The memory output by the operation. It's not necessarily requested
               by the current operation. For example, it can be a tensor
               forwarded from input to output, with in-place mutation.
 
-###Docs
+### Docs
 
 `-max_depth`: Show nodes that are at most this number of hops from starting node in the data structure.
 
diff --git a/tensorflow/core/profiler/g3doc/profile_memory.md b/tensorflow/core/profiler/g3doc/profile_memory.md
index a00683d062..6eda5abdd9 100644
--- a/tensorflow/core/profiler/g3doc/profile_memory.md
+++ b/tensorflow/core/profiler/g3doc/profile_memory.md
@@ -1,4 +1,4 @@
-##Profile Memory
+## Profile Memory
 
 It is generally a good idea to visualize the memory usage in timeline.
 It allows you to see the memory consumption of each GPU over time.
diff --git a/tensorflow/core/profiler/g3doc/profile_model_architecture.md b/tensorflow/core/profiler/g3doc/profile_model_architecture.md
index a42b2e918d..61bb66bd21 100644
--- a/tensorflow/core/profiler/g3doc/profile_model_architecture.md
+++ b/tensorflow/core/profiler/g3doc/profile_model_architecture.md
@@ -1,9 +1,9 @@
-##Profile Model Architecture
+## Profile Model Architecture
 
 * [Profile Model Parameters](#profile-model-parameters)
 * [Profile Model Float Operations](#profile-model-float-operations)
 
-###Profile Model Parameters
+### Profile Model Parameters
 
 Notes:
 `VariableV2` operation type might contain variables created by TensorFlow
@@ -39,9 +39,9 @@ param_stats = tf.profiler.profile(
 sys.stdout.write('total_params: %d\n' % param_stats.total_parameters)
 ```
 
-###Profile Model Float Operations
+### Profile Model Float Operations
 
-####Caveats
+#### Caveats
 
 For an operation to have float operation statistics:
 
diff --git a/tensorflow/core/profiler/g3doc/profile_time.md b/tensorflow/core/profiler/g3doc/profile_time.md
index e11a75553b..4aafc697a9 100644
--- a/tensorflow/core/profiler/g3doc/profile_time.md
+++ b/tensorflow/core/profiler/g3doc/profile_time.md
@@ -1,4 +1,4 @@
-##Profile Time
+## Profile Time
 
 * [Times in TensorFlow and tfprof](#times-in-tensorflow-and-tfprof)
 * [Profile by Python Code](#profile-by-python-code)
@@ -7,7 +7,7 @@
 * [Profile by Name Scope](#profile-by-name-scope)
 
 
-###Times in TensorFlow and tfprof
+### Times in TensorFlow and tfprof
 When we run a model, Tensorflow schedules and runs the nodes (operations)
 in the graph. An operation can be placed on an accelerator or on CPU.
 
@@ -37,7 +37,7 @@ When an operation is placed on CPU, it will completely run on CPU. Hence,
 should be 0.
 
 
-###Profile by Python Code
+### Profile by Python Code
 ```python
 # In code view, the time of each line of Python code is the aggregated
 # times of all operations created by that line.
@@ -112,7 +112,7 @@ Set ```-output timeline:outfile=``` to generate timeline instead of st
 
 
 
-###Profile by Operation Type
+### Profile by Operation Type
 ```python
 # In op view, you can view the aggregated time of each operation type.
 tfprof> op -select micros,occurrence -order_by micros
@@ -138,7 +138,7 @@ MatMul                        618.97ms (63.56%, 16.51%), |/job:worker/replica:0/
 ```
 
 
-###Profile by Graph
+### Profile by Graph
 
 Usually, use graph view to generate a timeline to visualize the result.
 
@@ -163,7 +163,7 @@ Open a Chrome browser, enter URL chrome://tracing and load the timeline file.
 ******************************************************
 ```
 
-###Profile by Name Scope
+### Profile by Name Scope
 
 Usually scope view allows you to pin point the problematic places if you
 have properly named your operations with tf.name_scope or tf.variable_scope.
-- 
GitLab


From 1acf8958429ed9a360dc07d2d63661a4c606c3ba Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Mon, 28 Aug 2017 18:49:58 -0700
Subject: [PATCH 049/294] Tweak profiler for the UI 1. Support parsing text
 format graph from python 2. Remove line-snippet for code view (too verbose)

PiperOrigin-RevId: 166790284
---
 tensorflow/core/grappler/BUILD                 |  1 -
 .../profiler/internal/print_model_analysis.cc  |  6 +++++-
 .../core/profiler/internal/tfprof_code.cc      |  5 -----
 .../core/profiler/internal/tfprof_stats.cc     |  2 +-
 .../python/profiler/model_analyzer_test.py     | 18 +++++++++---------
 5 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index 7fd261bfd5..c4ae5b79e4 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -117,7 +117,6 @@ cc_test(
     srcs = ["grappler_item_builder_test.cc"],
     deps = [
         ":grappler_item_builder",
-        "//google/protobuf:any",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/cc:functional_ops",
         "//tensorflow/cc:grad_testutil",
diff --git a/tensorflow/core/profiler/internal/print_model_analysis.cc b/tensorflow/core/profiler/internal/print_model_analysis.cc
index 65b54f01aa..fd46b957e8 100644
--- a/tensorflow/core/profiler/internal/print_model_analysis.cc
+++ b/tensorflow/core/profiler/internal/print_model_analysis.cc
@@ -87,7 +87,11 @@ bool NewProfiler(const string* graph, const string* op_log) {
   CHECK(!tf_stat) << "Currently only 1 living tfprof profiler is allowed";
   CHECK(graph) << "graph mustn't be null";
   std::unique_ptr graph_ptr(new GraphDef());
-  graph_ptr->ParseFromString(*graph);
+  if (!graph_ptr->ParseFromString(*graph)) {
+    if (!protobuf::TextFormat::ParseFromString(*graph, graph_ptr.get())) {
+      fprintf(stderr, "Failed to parse graph\n");
+    }
+  }
 
   std::unique_ptr op_log_ptr;
   if (op_log && !op_log->empty()) {
diff --git a/tensorflow/core/profiler/internal/tfprof_code.cc b/tensorflow/core/profiler/internal/tfprof_code.cc
index 7f4d682cda..c9c0baa908 100644
--- a/tensorflow/core/profiler/internal/tfprof_code.cc
+++ b/tensorflow/core/profiler/internal/tfprof_code.cc
@@ -44,11 +44,6 @@ string GetTraceString(const CodeDef::Trace& trace) {
   } else {
     ntrace += ":" + trace.function().substr(0, 17) + "...";
   }
-  if (trace.line().length() < 20) {
-    ntrace += ":" + trace.line();
-  } else {
-    ntrace += ":" + trace.line().substr(0, 17) + "...";
-  }
   return ntrace;
 }
 
diff --git a/tensorflow/core/profiler/internal/tfprof_stats.cc b/tensorflow/core/profiler/internal/tfprof_stats.cc
index 012c49525a..81db766796 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats.cc
+++ b/tensorflow/core/profiler/internal/tfprof_stats.cc
@@ -125,7 +125,7 @@ const MultiGraphNodeProto& TFStats::ShowMultiGraphNode(
   if (!Validate(opts)) {
     return empty_multi_graph_node_;
   }
-  if (cmd == kCmds[2]) {
+  if (cmd == kCmds[2] && has_code_traces()) {
     return code_view_->Show(opts);
   } else if (cmd == kCmds[3]) {
     return op_view_->Show(opts);
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 9492cadb8b..3432765b60 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -215,7 +215,7 @@ class PrintModelAnalysisTest(test.TestCase):
       with gfile.Open(outfile, 'r') as f:
         lines = f.read().split('\n')
         result = '\n'.join([l[:min(len(l), 80)] for l in lines])
-        self.assertEqual('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/91.04k flops)\n  model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (0/1.80k para\n    model_analyzer_testlib.py:35:BuildSmallModel:image = array_ops... (0/0 param\n    model_analyzer_testlib.py:39:BuildSmallModel:initializer=init_... (0/4 param\n    model_analyzer_testlib.py:43:BuildSmallModel:initializer=init_... (0/648 par\n    model_analyzer_testlib.py:44:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n    model_analyzer_testlib.py:48:BuildSmallModel:initializer=init_... (0/1.15k p\n    model_analyzer_testlib.py:49:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n  model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (gradient) (0\n    model_analyzer_testlib.py:44:BuildSmallModel:x = nn_ops.conv2d... (gradient)\n    model_analyzer_testlib.py:49:BuildSmallModel:x = nn_ops.conv2d... (gradient)\n  model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (0/1.04k para\n  model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (gradient) (0\n  model_analyzer_testlib.py:64:BuildFullModel:target = array_op... (0/0 params, \n  model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (0/0 params, \n  model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (gradient) (0\n  model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min... (0/0 params, \n',
+        self.assertEqual('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/91.04k flops)\n  model_analyzer_testlib.py:58:BuildFullModel (0/1.80k params, 0/41.76k flops)\n    model_analyzer_testlib.py:35:BuildSmallModel (0/0 params, 0/0 flops)\n    model_analyzer_testlib.py:39:BuildSmallModel (0/4 params, 0/0 flops)\n    model_analyzer_testlib.py:43:BuildSmallModel (0/648 params, 0/0 flops)\n    model_analyzer_testlib.py:44:BuildSmallModel (0/0 params, 0/23.33k flops)\n    model_analyzer_testlib.py:48:BuildSmallModel (0/1.15k params, 0/0 flops)\n    model_analyzer_testlib.py:49:BuildSmallModel (0/0 params, 0/18.43k flops)\n  model_analyzer_testlib.py:58:BuildFullModel (gradient) (0/0 params, 0/0 flops)\n    model_analyzer_testlib.py:44:BuildSmallModel (gradient) (0/0 params, 0/0 flo\n    model_analyzer_testlib.py:49:BuildSmallModel (gradient) (0/0 params, 0/0 flo\n  model_analyzer_testlib.py:62:BuildFullModel (0/1.04k params, 0/16.51k flops)\n  model_analyzer_testlib.py:62:BuildFullModel (gradient) (0/0 params, 0/32.77k f\n  model_analyzer_testlib.py:64:BuildFullModel (0/0 params, 0/0 flops)\n  model_analyzer_testlib.py:65:BuildFullModel (0/0 params, 0/0 flops)\n  model_analyzer_testlib.py:65:BuildFullModel (gradient) (0/0 params, 0/0 flops)\n  model_analyzer_testlib.py:67:BuildFullModel (0/0 params, 0/0 flops)\n',
                          result)
 
       self.assertLess(0, tfprof_node.total_exec_micros)
@@ -224,28 +224,28 @@ class PrintModelAnalysisTest(test.TestCase):
       self.assertEqual(8, len(tfprof_node.children))
       self.assertEqual('_TFProfRoot', tfprof_node.name)
       self.assertEqual(
-          'model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_...',
+          'model_analyzer_testlib.py:58:BuildFullModel',
           tfprof_node.children[0].name)
       self.assertEqual(
-          'model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (gradient)',
+          'model_analyzer_testlib.py:58:BuildFullModel (gradient)',
           tfprof_node.children[1].name)
       self.assertEqual(
-          'model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c...',
+          'model_analyzer_testlib.py:62:BuildFullModel',
           tfprof_node.children[2].name)
       self.assertEqual(
-          'model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (gradient)',
+          'model_analyzer_testlib.py:62:BuildFullModel (gradient)',
           tfprof_node.children[3].name)
       self.assertEqual(
-          'model_analyzer_testlib.py:64:BuildFullModel:target = array_op...',
+          'model_analyzer_testlib.py:64:BuildFullModel',
           tfprof_node.children[4].name)
       self.assertEqual(
-          'model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_...',
+          'model_analyzer_testlib.py:65:BuildFullModel',
           tfprof_node.children[5].name)
       self.assertEqual(
-          'model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (gradient)',
+          'model_analyzer_testlib.py:65:BuildFullModel (gradient)',
           tfprof_node.children[6].name)
       self.assertEqual(
-          'model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min...',
+          'model_analyzer_testlib.py:67:BuildFullModel',
           tfprof_node.children[7].name)
       # pylint: enable=line-too-long
 
-- 
GitLab


From ccad7da043fb10af3467bac989bff4b73db34210 Mon Sep 17 00:00:00 2001
From: Yao Zhang 
Date: Mon, 28 Aug 2017 19:52:17 -0700
Subject: [PATCH 050/294] Remove a duplicated node in test.

PiperOrigin-RevId: 166794759
---
 tensorflow/core/grappler/utils/frame_test.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/core/grappler/utils/frame_test.cc b/tensorflow/core/grappler/utils/frame_test.cc
index 95ac857302..30673eed7a 100644
--- a/tensorflow/core/grappler/utils/frame_test.cc
+++ b/tensorflow/core/grappler/utils/frame_test.cc
@@ -161,7 +161,6 @@ TEST_F(IdentifyFramesTest, MultipleEnterNodes) {
   *graph.add_node() = CreateNode("5", {});
   *graph.add_node() = CreateNode("4", "Enter", frame, {"5"});
   *graph.add_node() = CreateNode("3", {"4", "2"});
-  *graph.add_node() = CreateNode("4", {"5"});
   *graph.add_node() = CreateNode("6", "Merge", {"3", "8"});
   *graph.add_node() = CreateNode("7", "Switch", {"6"});
   *graph.add_node() = CreateNode("8", "NextIteration", {"7"});
-- 
GitLab


From 2fa844c88047afcc519ec60a1f74f1cbc03e99cc Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy 
Date: Mon, 28 Aug 2017 20:46:24 -0700
Subject: [PATCH 051/294] Double the tolerance in nn_grad_test to avoid flakes.

PiperOrigin-RevId: 166798376
---
 tensorflow/cc/gradients/nn_grad_test.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index a02d36549b..f9a512f29e 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -36,7 +36,7 @@ class NNGradTest : public ::testing::Test {
     float max_error;
     TF_ASSERT_OK(ComputeGradientError(scope_, {x}, {x_shape}, {y}, {y_shape},
                                       &max_error));
-    EXPECT_LT(max_error, 1e-4);
+    EXPECT_LT(max_error, 2e-4);
   }
 
   void RunTest(const Output& x, const Tensor& x_init_value, const Output& y,
@@ -44,7 +44,7 @@ class NNGradTest : public ::testing::Test {
     float max_error;
     TF_ASSERT_OK(
         ComputeGradientError(scope_, x, x_init_value, y, y_shape, &max_error));
-    EXPECT_LT(max_error, 1e-4);
+    EXPECT_LT(max_error, 2e-4);
   }
 
   void RunTest(const OutputList& xs, const std::vector& x_shapes,
@@ -53,7 +53,7 @@ class NNGradTest : public ::testing::Test {
     float max_error;
     TF_ASSERT_OK(
         ComputeGradientError(scope_, xs, x_shapes, ys, y_shapes, &max_error));
-    EXPECT_LT(max_error, 1e-4);
+    EXPECT_LT(max_error, 2e-4);
   }
 
   Scope scope_;
-- 
GitLab


From 5396e8ee172da2d3b5e20fae775facf91cd0490c Mon Sep 17 00:00:00 2001
From: Benoit Steiner 
Date: Mon, 28 Aug 2017 22:20:39 -0700
Subject: [PATCH 052/294] Avoid duplicating the functionality in
 GrapplerItem::NodesToPreserve()

PiperOrigin-RevId: 166804348
---
 tensorflow/core/grappler/optimizers/constant_folding.cc | 7 +------
 tensorflow/core/grappler/optimizers/constant_folding.h  | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index a830af05e1..ada166703e 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -844,12 +844,7 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
                                  GraphDef* output) {
   graph_ = item.graph;
   node_map_.reset(new NodeMap(&graph_));
-  for (const auto& node : item.fetch) {
-    nodes_to_preserve_.insert(NodeName(node));
-  }
-  for (const auto& node : item.feed) {
-    nodes_to_preserve_.insert(NodeName(node.first));
-  }
+  nodes_to_preserve_ = item.NodesToPreserve();
   device_.reset(new DeviceSimple());
   *output = GraphDef();
 
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h
index 26a984205b..0c1c40dfd3 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.h
+++ b/tensorflow/core/grappler/optimizers/constant_folding.h
@@ -73,7 +73,7 @@ class ConstantFolding : public GraphOptimizer {
   std::unique_ptr resource_mgr_;
   GraphDef graph_;
   std::unique_ptr node_map_;
-  std::set nodes_to_preserve_;
+  std::unordered_set nodes_to_preserve_;
   GraphDef added_graph_;
   bool has_fetch_;
 };
-- 
GitLab


From c32d401c51ce004dcd33811e7c9027bf259f38a0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Mon, 28 Aug 2017 22:33:32 -0700
Subject: [PATCH 053/294] Optimize convert_to_tensor.

PiperOrigin-RevId: 166805061
---
 tensorflow/c/eager/c_api.cc                   |   6 +-
 .../contrib/nccl/kernels/nccl_manager.h       |   2 +-
 tensorflow/contrib/nccl/kernels/nccl_ops.cc   |   2 +-
 tensorflow/python/eager/ops_test.py           |   2 +-
 tensorflow/python/framework/ops.py            | 140 ++++++++++--------
 5 files changed, 87 insertions(+), 65 deletions(-)

diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index b1baa5ce12..01e251a1ac 100644
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -30,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/rendezvous_mgr.h"
 #include "tensorflow/core/framework/rendezvous.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/gtl/stl_util.h"
@@ -451,8 +452,9 @@ tensorflow::Status ValidateInputTypeAndPlacement(
       return tensorflow::errors::InvalidArgument(
           "cannot compute ", op->name, " as input #", i,
           " was expected to be a ",
-          tensorflow::DataType_Name(kernel->input_type(i)), " tensor but is a ",
-          tensorflow::DataType_Name(op->inputs[i].dtype()), " tensor");
+          tensorflow::DataTypeString(kernel->input_type(i)),
+          " tensor but is a ",
+          tensorflow::DataTypeString(op->inputs[i].dtype()), " tensor");
     }
   }
   return tensorflow::Status::OK();
diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.h b/tensorflow/contrib/nccl/kernels/nccl_manager.h
index 1a661e8f7f..6e2f8e953a 100644
--- a/tensorflow/contrib/nccl/kernels/nccl_manager.h
+++ b/tensorflow/contrib/nccl/kernels/nccl_manager.h
@@ -20,7 +20,7 @@ limitations under the License.
 #include 
 #include 
 
-#include "external/nccl_archive/src/nccl.h"
+#include "src/nccl.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/platform/mutex.h"
diff --git a/tensorflow/contrib/nccl/kernels/nccl_ops.cc b/tensorflow/contrib/nccl/kernels/nccl_ops.cc
index 3c532e3d73..d4455483f7 100644
--- a/tensorflow/contrib/nccl/kernels/nccl_ops.cc
+++ b/tensorflow/contrib/nccl/kernels/nccl_ops.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include 
 #include 
 
-#include "external/nccl_archive/src/nccl.h"
+#include "src/nccl.h"
 #include "tensorflow/contrib/nccl/kernels/nccl_manager.h"
 #include "tensorflow/core/framework/op_kernel.h"
 
diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py
index 78ff2f6777..d1cb3508cf 100644
--- a/tensorflow/python/eager/ops_test.py
+++ b/tensorflow/python/eager/ops_test.py
@@ -272,7 +272,7 @@ class TargetTest(test_util.TensorFlowTestCase):
 
   def testInvalidInputDataType(self):
     # Fill requires the first input to be an int32 tensor.
-    with self.assertRaisesRegexp(ValueError, 'int64'):
+    with self.assertRaisesRegexp(errors.InvalidArgumentError, 'int64'):
       array_ops.fill(tensor.Tensor([2], dtype=dtypes.int64), tensor.Tensor(1))
 
   def testOutputOnHostMemory(self):
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 4284a51a57..5a0c323ce4 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -925,6 +925,8 @@ def _TensorTensorConversionFunction(t, dtype=None, name=None, as_ref=False):
 _tensor_conversion_func_registry = {
     0: [(Tensor, _TensorTensorConversionFunction)]
 }
+_tensor_conversion_func_cache = {}
+_tensor_conversion_func_lock = threading.Lock()
 register_dense_tensor_like_type(Tensor)
 
 
@@ -981,6 +983,10 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None):
       as_ref=False)
 
 
+def _error_prefix(name):
+  return "" if name is None else "%s: " % name
+
+
 def internal_convert_to_tensor(value,
                                dtype=None,
                                name=None,
@@ -1017,59 +1023,70 @@ def internal_convert_to_tensor(value,
     RuntimeError: If a registered conversion function returns an invalid value.
 
   """
-  # Fast path for removing unnecessary convert-to-tensors.
-  if (isinstance(ag_core.getval(value), Tensor)
-      and (dtype is None or dtype == value.dtype)
-      and context.in_eager_mode()):
+  # Note we check the type of the object unwrapped from an autograd node, if
+  # tracing gradients, to ensure the same behavior happens with and without
+  # tracing.
+  unwrapped = ag_core.getval(value)
+  # Fast path for EagerTensors that don't need any conversion.
+  if isinstance(unwrapped, EagerTensor) and context.in_eager_mode():
+    # Note that we don't check that value's dtype matches the dtype
+    # argument.  We exepct that the C runtime will do that checking
+    # when we execute the kernel.
     return value
-  error_prefix = "" if name is None else "%s: " % name
   if dtype is not None:
     dtype = dtypes.as_dtype(dtype)
-  for _, funcs_at_priority in sorted(_tensor_conversion_func_registry.items()):
-    for base_type, conversion_func in funcs_at_priority:
-      # Note we check the type of the object unwrapped from an autograd node, if
-      # tracing gradients, to ensure the same behavior happens with and without
-      # tracing.
-      if isinstance(ag_core.getval(value), base_type):
-        # If dtype is None but preferred_dtype is not None, we try to
-        # cast to preferred_dtype first.
+  unwrapped_type = type(unwrapped)
+  conversion_func_list = _tensor_conversion_func_cache.get(unwrapped_type, None)
+  if conversion_func_list is None:
+    with _tensor_conversion_func_lock:
+      conversion_func_list = []
+      for _, funcs_at_priority in sorted(
+          _tensor_conversion_func_registry.items()):
+        for base_type, conversion_func in funcs_at_priority:
+          if isinstance(unwrapped, base_type):
+            conversion_func_list.append((base_type, conversion_func))
+      _tensor_conversion_func_cache[unwrapped_type] = conversion_func_list
+
+  for base_type, conversion_func in conversion_func_list:
+    # If dtype is None but preferred_dtype is not None, we try to
+    # cast to preferred_dtype first.
+    ret = None
+    if dtype is None and preferred_dtype is not None:
+      try:
+        ret = conversion_func(
+            value, dtype=preferred_dtype, name=name, as_ref=as_ref)
+      except (TypeError, ValueError):
+        # Could not coerce the conversion to use the preferred dtype.
         ret = None
-        if dtype is None and preferred_dtype is not None:
-          try:
-            ret = conversion_func(
-                value, dtype=preferred_dtype, name=name, as_ref=as_ref)
-          except (TypeError, ValueError):
-            # Could not coerce the conversion to use the preferred dtype.
-            ret = None
-
-          if ret is not None and ret is not NotImplemented:
-            if (ret.dtype.base_dtype !=
-                dtypes.as_dtype(preferred_dtype).base_dtype):
-              raise TypeError("convert_to_tensor did not convert to "
-                              "the preferred dtype: %s vs %s " %
-                              (ret.dtype.base_dtype,
-                               dtypes.as_dtype(preferred_dtype).base_dtype))
-
-        if ret is None:
-          ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
-
-        if ret is NotImplemented:
-          continue
-
-        if not isinstance(ag_core.getval(ret), Tensor):
-          raise RuntimeError(
-              "%sConversion function %r for type %s returned non-Tensor: %r" %
-              (error_prefix, conversion_func, base_type, ret))
-        if dtype and not dtype.is_compatible_with(ret.dtype):
-          raise RuntimeError(
-              "%sConversion function %r for type %s returned incompatible "
-              "dtype: requested = %s, actual = %s" %
-              (error_prefix, conversion_func, base_type, dtype.name,
-               ret.dtype.name))
-        return ret
+
+      if ret is not None and ret is not NotImplemented:
+        if (ret.dtype.base_dtype !=
+            dtypes.as_dtype(preferred_dtype).base_dtype):
+          raise TypeError("convert_to_tensor did not convert to "
+                          "the preferred dtype: %s vs %s " %
+                          (ret.dtype.base_dtype,
+                           dtypes.as_dtype(preferred_dtype).base_dtype))
+
+    if ret is None:
+      ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
+
+    if ret is NotImplemented:
+      continue
+
+    if not isinstance(ag_core.getval(ret), Tensor):
+      raise RuntimeError(
+          "%sConversion function %r for type %s returned non-Tensor: %r" %
+          (_error_prefix(name), conversion_func, base_type, ret))
+    if dtype and not dtype.is_compatible_with(ret.dtype):
+      raise RuntimeError(
+          "%sConversion function %r for type %s returned incompatible "
+          "dtype: requested = %s, actual = %s" %
+          (_error_prefix(name), conversion_func, base_type, dtype.name,
+           ret.dtype.name))
+    return ret
   raise TypeError("%sCannot convert %r with type %s to Tensor: "
-                  "no conversion function registered." % (error_prefix, value,
-                                                          type(value)))
+                  "no conversion function registered." %
+                  (_error_prefix(name), value, unwrapped_type))
 
 
 def internal_convert_n_to_tensor(values,
@@ -1319,19 +1336,22 @@ def register_tensor_conversion_function(base_type,
     TypeError: If the arguments do not have the appropriate type.
 
   """
-  if not (isinstance(base_type, type) or
-          (isinstance(base_type, tuple) and
-           all(isinstance(x, type) for x in base_type))):
-    raise TypeError("base_type must be a type or a tuple of types.")
-  if not callable(conversion_func):
-    raise TypeError("conversion_func must be callable.")
+  global _tensor_conversion_func_cache
+  with _tensor_conversion_func_lock:
+    if not (isinstance(base_type, type) or
+            (isinstance(base_type, tuple) and
+             all(isinstance(x, type) for x in base_type))):
+      raise TypeError("base_type must be a type or a tuple of types.")
+    if not callable(conversion_func):
+      raise TypeError("conversion_func must be callable.")
 
-  try:
-    funcs_at_priority = _tensor_conversion_func_registry[priority]
-  except KeyError:
-    funcs_at_priority = []
-    _tensor_conversion_func_registry[priority] = funcs_at_priority
-  funcs_at_priority.append((base_type, conversion_func))
+    try:
+      funcs_at_priority = _tensor_conversion_func_registry[priority]
+    except KeyError:
+      funcs_at_priority = []
+      _tensor_conversion_func_registry[priority] = funcs_at_priority
+    funcs_at_priority.append((base_type, conversion_func))
+    _tensor_conversion_func_cache = {}
 
 
 class IndexedSlices(_TensorLike):
-- 
GitLab


From a0cb8c528604819724059da1bbfbf8f2b95fda98 Mon Sep 17 00:00:00 2001
From: Yao Zhang 
Date: Mon, 28 Aug 2017 23:24:45 -0700
Subject: [PATCH 054/294] Support loop for layout optimizer.

PiperOrigin-RevId: 166808322
---
 tensorflow/core/grappler/optimizers/BUILD     |   1 +
 .../grappler/optimizers/constant_folding.cc   |   4 -
 .../grappler/optimizers/layout_optimizer.cc   | 490 ++++++++++++------
 tensorflow/core/grappler/utils.cc             |   8 +
 tensorflow/core/grappler/utils.h              |   8 +
 .../python/grappler/layout_optimizer_test.py  |  52 +-
 6 files changed, 401 insertions(+), 162 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD
index a7515786a0..659451e991 100644
--- a/tensorflow/core/grappler/optimizers/BUILD
+++ b/tensorflow/core/grappler/optimizers/BUILD
@@ -275,6 +275,7 @@ cc_library(
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/clusters:cluster",
         "//tensorflow/core/grappler/costs:graph_properties",
+        "//tensorflow/core/grappler/utils:frame",
     ],
 )
 
diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index ada166703e..35b0b7c163 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -94,10 +94,6 @@ class DeviceSimple : public DeviceBase {
   std::unique_ptr eigen_device_;
 };
 
-string AsControlDependency(const NodeDef& node) {
-  return strings::StrCat("^", node.name());
-}
-
 }  // namespace
 
 ConstantFolding::ConstantFolding() {
diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
index f469f9a9ac..a4b0a60e1f 100644
--- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/optimizers/layout_optimizer.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/grappler/utils/frame.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 
@@ -95,10 +96,84 @@ bool IsNodeNCHWToNHWC(const string& node_name) {
   return false;
 }
 
-class NodeProcessor {
+class GraphProcessor {
  public:
-  NodeProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : graph_(graph), node_(node), node_map_(node_map) {}
+  GraphProcessor(GraphDef* graph, NodeMap* node_map)
+      : graph_(graph), node_map_(node_map) {}
+
+ protected:
+  NodeDef* AddNodePermConst(const string& name, const string& device,
+                            const std::vector& permutation) {
+    NodeDef* node = graph_->add_node();
+    node_map_->AddNode(name, node);
+    node->set_name(name);
+    node->set_op("Const");
+    node->set_device(device);
+    AttrValue attr_data_type;
+    attr_data_type.set_type(DT_INT32);
+    node->mutable_attr()->insert({"dtype", attr_data_type});
+    AttrValue attr_tensor;
+    Tensor tensor(DT_INT32, TensorShape({4}));
+    for (int i = 0; static_cast(i) < permutation.size(); i++) {
+      tensor.flat()(i) = permutation[i];
+    }
+    tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
+    node->mutable_attr()->insert({"value", attr_tensor});
+    return node;
+  }
+
+  NodeDef* AddNodeConstScalar(const string& name, const string& device,
+                              DataType dtype, int value) {
+    NodeDef* node = graph_->add_node();
+    node_map_->AddNode(name, node);
+    node->set_name(name);
+    node->set_op("Const");
+    node->set_device(device);
+    AttrValue attr_data_type;
+    attr_data_type.set_type(dtype);
+    node->mutable_attr()->insert({"dtype", attr_data_type});
+    AttrValue attr_tensor;
+    Tensor tensor(dtype, TensorShape({}));
+    tensor.scalar()() = value;
+    tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
+    node->mutable_attr()->insert({"value", attr_tensor});
+    return node;
+  }
+
+  NodeDef* AddNodeReductionConst(const string& name, const string& device) {
+    NodeDef* node = graph_->add_node();
+    node_map_->AddNode(name, node);
+    node->set_name(name);
+    node->set_op("Const");
+    node->set_device(device);
+    AttrValue attr_data_type;
+    attr_data_type.set_type(DT_INT32);
+    node->mutable_attr()->insert({"dtype", attr_data_type});
+
+    AttrValue attr_tensor;
+    Tensor tensor(DT_INT32, TensorShape({3}));
+    std::vector axis = {0, 2, 3};
+    for (int i = 0; static_cast(i) < axis.size(); i++) {
+      tensor.flat()(i) = axis[i];
+    }
+    tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
+    node->mutable_attr()->insert({"value", attr_tensor});
+    return node;
+  }
+
+  GraphDef* graph_;
+  NodeMap* node_map_;
+
+ private:
+};
+
+class NodeProcessor : public GraphProcessor {
+ public:
+  NodeProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                bool is_in_frame)
+      : GraphProcessor(graph, node_map),
+        node_(node),
+        is_in_frame_(is_in_frame) {}
   virtual ~NodeProcessor() {}
   virtual Status ConvertNode() {
     if (ShouldProcess()) {
@@ -229,14 +304,14 @@ class NodeProcessor {
   }
 
   NodeDef* AddNodeTranspose(const string& node_name, const string& input_name,
-                            DataType data_type,
+                            const string& const_name, DataType data_type,
                             const TensorShapeProto& input_shape,
                             bool NHWCToNCHW) {
     NodeDef* node = graph_->add_node();
     node_map_->AddNode(node_name, node);
     node->set_name(node_name);
     *node->add_input() = input_name;
-    *node->add_input() = NHWCToNCHW ? kPermNHWCToNCHW : kPermNCHWToNHWC;
+    *node->add_input() = const_name;
     node->set_op("Transpose");
     node->set_device(node_->device());
     AttrValue attr_data_type;
@@ -276,8 +351,10 @@ class NodeProcessor {
       auto input_node = node_map_->GetNode(node_->input(pos));
       TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
       TF_RETURN_IF_ERROR(HasAttribute(*input_node, "_output_shapes"));
+      string const_name = GetOrAddNodePermNHWCToNCHW(pos);
       AddNodeTranspose(
-          node_name, node_->input(pos), node_->attr().at("T").type(),
+          node_name, node_->input(pos), const_name,
+          node_->attr().at("T").type(),
           input_node->attr().at("_output_shapes").list().shape(output_pos),
           true);
       node_map_->UpdateOutput(node_->input(pos), node_->name(), node_name);
@@ -289,6 +366,7 @@ class NodeProcessor {
 
   virtual Status AddLayoutTransposeToOutputs() {
     auto outputs = node_map_->GetOutputs(node_->name());
+    string const_name = GetOrAddNodePermNCHWToNHWC();
     for (const auto& output : outputs) {
       string base_name = strings::StrCat(node_->name(), "-", output->name());
       string node_name =
@@ -315,9 +393,9 @@ class NodeProcessor {
       }
       TF_RETURN_IF_ERROR(HasAttribute(*node_, "T"));
       TF_RETURN_IF_ERROR(HasAttribute(*node_, "_output_shapes"));
-      AddNodeTranspose(node_name, node_->name(), node_->attr().at("T").type(),
-                       node_->attr().at("_output_shapes").list().shape(0),
-                       false);
+      AddNodeTranspose(
+          node_name, node_->name(), const_name, node_->attr().at("T").type(),
+          node_->attr().at("_output_shapes").list().shape(0), false);
       *it = node_name;
       node_map_->UpdateOutput(node_->name(), output->name(), node_name);
       node_map_->AddOutput(node_name, output->name());
@@ -327,11 +405,56 @@ class NodeProcessor {
 
   virtual Status CustomizedProcessing() { return Status::OK(); }
 
-  GraphDef* graph_;
+  NodeDef* AddNodePermNHWCToNCHW(const string& suffix,
+                                 const string& depended_node,
+                                 const string& device) {
+    auto const_node = AddNodePermConst(
+        strings::StrCat(kPermNHWCToNCHW, "-", suffix), device, {0, 3, 1, 2});
+    // This is to ensure the transpose node and the const node are in the
+    // same frame.
+    *const_node->add_input() = AsControlDependency(depended_node);
+    return const_node;
+  }
+
+  NodeDef* AddNodePermNCHWToNHWC(const string& suffix,
+                                 const string& depended_node,
+                                 const string& device) {
+    auto const_node = AddNodePermConst(
+        strings::StrCat(kPermNCHWToNHWC, "-", suffix), device, {0, 2, 3, 1});
+    // This is to ensure the transpose node and the const node are in the same
+    // frame.
+    *const_node->add_input() = AsControlDependency(depended_node);
+    return const_node;
+  }
+
   NodeDef* node_;
-  NodeMap* node_map_;
+  bool is_in_frame_;
 
  private:
+  string GetOrAddNodePermNHWCToNCHW(int pos) {
+    string const_name;
+    if (is_in_frame_) {
+      auto const_node = AddNodePermNHWCToNCHW(
+          node_->input(pos), NodeName(node_->input(pos)), node_->device());
+      const_name = const_node->name();
+    } else {
+      const_name = kPermNHWCToNCHW;
+    }
+    return const_name;
+  }
+
+  string GetOrAddNodePermNCHWToNHWC() {
+    string const_name;
+    if (is_in_frame_) {
+      auto const_node =
+          AddNodePermNCHWToNHWC(node_->name(), node_->name(), node_->device());
+      const_name = const_node->name();
+    } else {
+      const_name = kPermNCHWToNHWC;
+    }
+    return const_name;
+  }
+
   void UpdateTuple(AttrValue_ListValue* list) {
     int64 h = list->i(1);
     int64 w = list->i(2);
@@ -344,8 +467,9 @@ class NodeProcessor {
 
 class AvgPoolGradProcessor : public NodeProcessor {
  public:
-  AvgPoolGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : NodeProcessor(graph, node, node_map) {}
+  AvgPoolGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                       bool is_in_frame)
+      : NodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   std::vector GetInputPos() const override {
@@ -357,8 +481,9 @@ class AvgPoolGradProcessor : public NodeProcessor {
 
 class BiasAddGradProcessor : public NodeProcessor {
  public:
-  BiasAddGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : NodeProcessor(graph, node, node_map) {}
+  BiasAddGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                       bool is_in_frame)
+      : NodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   bool ShouldProcess() const override {
@@ -377,8 +502,8 @@ class BiasAddGradProcessor : public NodeProcessor {
 class Conv2DProcessor : public NodeProcessor {
  public:
   Conv2DProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
-                  bool no_gemm)
-      : NodeProcessor(graph, node, node_map), no_gemm_(no_gemm) {}
+                  bool no_gemm, bool is_in_frame)
+      : NodeProcessor(graph, node, node_map, is_in_frame), no_gemm_(no_gemm) {}
 
  protected:
   bool ShouldProcess() const override {
@@ -447,8 +572,9 @@ class Conv2DProcessor : public NodeProcessor {
 class Conv2DBackpropFilterProcessor : public Conv2DProcessor {
  public:
   Conv2DBackpropFilterProcessor(GraphDef* graph, NodeDef* node,
-                                NodeMap* node_map, bool no_gemm)
-      : Conv2DProcessor(graph, node, node_map, no_gemm) {}
+                                NodeMap* node_map, bool no_gemm,
+                                bool is_in_frame)
+      : Conv2DProcessor(graph, node, node_map, no_gemm, is_in_frame) {}
 
  protected:
   bool IsGemmUsed() const override {
@@ -472,8 +598,9 @@ class Conv2DBackpropFilterProcessor : public Conv2DProcessor {
 class Conv2DBackpropInputProcessor : public Conv2DProcessor {
  public:
   Conv2DBackpropInputProcessor(GraphDef* graph, NodeDef* node,
-                               NodeMap* node_map, bool no_gemm)
-      : Conv2DProcessor(graph, node, node_map, no_gemm) {}
+                               NodeMap* node_map, bool no_gemm,
+                               bool is_in_frame)
+      : Conv2DProcessor(graph, node, node_map, no_gemm, is_in_frame) {}
 
  protected:
   bool IsGemmUsed() const override {
@@ -492,8 +619,9 @@ class Conv2DBackpropInputProcessor : public Conv2DProcessor {
 
 class FusedBatchNormGradProcessor : public NodeProcessor {
  public:
-  FusedBatchNormGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : NodeProcessor(graph, node, node_map) {}
+  FusedBatchNormGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                              bool is_in_frame)
+      : NodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   std::vector GetInputPos() const override {
@@ -504,8 +632,9 @@ class FusedBatchNormGradProcessor : public NodeProcessor {
 
 class MaxPoolGradProcessor : public NodeProcessor {
  public:
-  MaxPoolGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : NodeProcessor(graph, node, node_map) {}
+  MaxPoolGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                       bool is_in_frame)
+      : NodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   std::vector GetInputPos() const override {
@@ -516,8 +645,9 @@ class MaxPoolGradProcessor : public NodeProcessor {
 
 class AgnosticNodeProcessor : public NodeProcessor {
  public:
-  AgnosticNodeProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : NodeProcessor(graph, node, node_map) {}
+  AgnosticNodeProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                        bool is_in_frame)
+      : NodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   bool ShouldProcess() const override {
@@ -548,8 +678,9 @@ class AgnosticNodeProcessor : public NodeProcessor {
 
 class AddNProcessor : public AgnosticNodeProcessor {
  public:
-  AddNProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : AgnosticNodeProcessor(graph, node, node_map) {}
+  AddNProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                bool is_in_frame)
+      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   std::vector GetInputPos() const override {
@@ -564,8 +695,9 @@ class AddNProcessor : public AgnosticNodeProcessor {
 
 class BinaryOpProcessor : public AgnosticNodeProcessor {
  public:
-  BinaryOpProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : AgnosticNodeProcessor(graph, node, node_map) {
+  BinaryOpProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                    bool is_in_frame)
+      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {
     is_4d_with_vector_ = Is4DOperateWithVector();
   }
 
@@ -672,8 +804,9 @@ class BinaryOpProcessor : public AgnosticNodeProcessor {
 
 class ConcatProcessor : public AgnosticNodeProcessor {
  public:
-  ConcatProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : AgnosticNodeProcessor(graph, node, node_map) {
+  ConcatProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                  bool is_in_frame)
+      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {
     // For Concat,  the concat axis is the first input; for ConcatV2,
     // the last input.
     axis_node_pos_ =
@@ -698,8 +831,9 @@ class ConcatProcessor : public AgnosticNodeProcessor {
   }
 
   Status CustomizedProcessing() override {
-    node_map_->AddOutput(kConcatConst, node_->name());
-    *node_->mutable_input(axis_node_pos_) = kConcatConst;
+    string concat_const_name = GetOrAddNodeConcatConst();
+    node_map_->AddOutput(concat_const_name, node_->name());
+    *node_->mutable_input(axis_node_pos_) = concat_const_name;
     return Status::OK();
   }
 
@@ -712,12 +846,38 @@ class ConcatProcessor : public AgnosticNodeProcessor {
   }
 
   int axis_node_pos_;
+
+ private:
+  NodeDef* AddNodeConcatConst(const string& suffix, const string& depended_node,
+                              const string& device) {
+    auto const_node = AddNodeConstScalar(
+        strings::StrCat(kConcatConst, "-", suffix), device, DT_INT32, 1);
+    // This is to ensure the concat node and the const node are
+    // in the same frame.
+    *const_node->add_input() = AsControlDependency(depended_node);
+    return const_node;
+  }
+
+  string GetOrAddNodeConcatConst() {
+    string const_name;
+    if (is_in_frame_) {
+      int value_node_pos = (axis_node_pos_ == 0) ? 1 : 0;
+      auto const_node = AddNodeConcatConst(
+          node_->name(), NodeName(node_->input(value_node_pos)),
+          node_->device());
+      const_name = const_node->name();
+    } else {
+      const_name = kConcatConst;
+    }
+    return const_name;
+  }
 };
 
 class ReluGradProcessor : public AgnosticNodeProcessor {
  public:
-  ReluGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : AgnosticNodeProcessor(graph, node, node_map) {}
+  ReluGradProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                    bool is_in_frame)
+      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   std::vector GetInputPos() const override {
@@ -728,8 +888,9 @@ class ReluGradProcessor : public AgnosticNodeProcessor {
 
 class SliceProcessor : public AgnosticNodeProcessor {
  public:
-  SliceProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : AgnosticNodeProcessor(graph, node, node_map) {}
+  SliceProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                 bool is_in_frame)
+      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   Status CustomizedProcessing() override {
@@ -749,14 +910,62 @@ class SliceProcessor : public AgnosticNodeProcessor {
   }
 
  private:
+  NodeDef* AddNodeGatherAxisConst(const string& suffix,
+                                  const string& depended_node,
+                                  const string& device) {
+    auto const_node = AddNodeConstScalar(
+        strings::StrCat(kGatherAxisConst, "-", suffix), device, DT_INT32, 0);
+    // This is to ensure the Slice node and the const node are
+    // in the same frame.
+    *const_node->add_input() = AsControlDependency(depended_node);
+    return const_node;
+  }
+
+  string GetOrAddNodeGatherAxisConst() {
+    string const_name;
+    if (is_in_frame_) {
+      auto const_node = AddNodeGatherAxisConst(
+          node_->name(), NodeName(node_->input(0)), node_->device());
+      const_name = const_node->name();
+    } else {
+      const_name = kGatherAxisConst;
+    }
+    return const_name;
+  }
+
+  string GetOrAddNodePermNHWCToNCHW() {
+    string const_name;
+    if (is_in_frame_) {
+      auto const_node = AddNodePermNHWCToNCHW(
+          node_->name(), NodeName(node_->input(0)), node_->device());
+      const_name = const_node->name();
+    } else {
+      const_name = kPermNHWCToNCHW;
+    }
+    return const_name;
+  }
+
+  string GetOrAddNodePermNCHWToNHWC() {
+    string const_name;
+    if (is_in_frame_) {
+      auto const_node = AddNodePermNCHWToNHWC(
+          node_->name(), NodeName(node_->input(0)), node_->device());
+      const_name = const_node->name();
+    } else {
+      const_name = kPermNCHWToNHWC;
+    }
+    return const_name;
+  }
+
   void AddNodePermVec(const string& node_name, const string& input_name,
                       DataType data_type, bool NHWCToNCHW) {
     NodeDef* node = graph_->add_node();
     node_map_->AddNode(node_name, node);
     node->set_name(node_name);
     *node->add_input() = input_name;
-    *node->add_input() = NHWCToNCHW ? kPermNHWCToNCHW : kPermNCHWToNHWC;
-    *node->add_input() = kGatherAxisConst;
+    *node->add_input() = NHWCToNCHW ? GetOrAddNodePermNHWCToNCHW()
+                                    : GetOrAddNodePermNCHWToNHWC();
+    *node->add_input() = GetOrAddNodeGatherAxisConst();
     node->set_op("GatherV2");
 
     AttrValue attr_type_indices;
@@ -782,8 +991,9 @@ class SliceProcessor : public AgnosticNodeProcessor {
 // before this optimization.
 class SliceProcessorConst : public AgnosticNodeProcessor {
  public:
-  SliceProcessorConst(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : AgnosticNodeProcessor(graph, node, node_map) {}
+  SliceProcessorConst(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                      bool is_in_frame)
+      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   Status CustomizedProcessing() override {
@@ -799,8 +1009,9 @@ class SliceProcessorConst : public AgnosticNodeProcessor {
 // example use case is in the gradient computation of Concat for InceptionV3.
 class SliceProcessorConcatOffset : public AgnosticNodeProcessor {
  public:
-  SliceProcessorConcatOffset(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : AgnosticNodeProcessor(graph, node, node_map) {}
+  SliceProcessorConcatOffset(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                             bool is_in_frame)
+      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   Status CustomizedProcessing() override {
@@ -849,8 +1060,9 @@ class SliceProcessorConcatOffset : public AgnosticNodeProcessor {
 
 class SqueezeProcessor : public AgnosticNodeProcessor {
  public:
-  SqueezeProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : AgnosticNodeProcessor(graph, node, node_map) {}
+  SqueezeProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+                   bool is_in_frame)
+      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   bool ShouldProcess() const override {
@@ -898,8 +1110,9 @@ class SqueezeProcessor : public AgnosticNodeProcessor {
 
 class SumProcessor : public AgnosticNodeProcessor {
  public:
-  SumProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map)
-      : AgnosticNodeProcessor(graph, node, node_map) {}
+  SumProcessor(GraphDef* graph, NodeDef* node, NodeMap* node_map,
+               bool is_in_frame)
+      : AgnosticNodeProcessor(graph, node, node_map, is_in_frame) {}
 
  protected:
   bool ShouldProcess() const override {
@@ -913,7 +1126,7 @@ class SumProcessor : public AgnosticNodeProcessor {
 
   Status CustomizedProcessing() override {
     node_map_->AddOutput(kReductionConst, node_->name());
-    *node_->mutable_input(1) = kReductionConst;
+    *node_->mutable_input(1) = GetOrAddNodeReductionConst();
     return Status::OK();
   }
 
@@ -938,6 +1151,29 @@ class SumProcessor : public AgnosticNodeProcessor {
     }
     return false;
   }
+
+  NodeDef* AddNodeReductionConst(const string& suffix,
+                                 const string& depended_node,
+                                 const string& device) {
+    auto const_node = GraphProcessor::AddNodeReductionConst(
+        strings::StrCat(kReductionConst, "-", suffix), device);
+    // This is to ensure the Sum node and the const node are in the
+    // same frame.
+    *const_node->add_input() = AsControlDependency(depended_node);
+    return const_node;
+  }
+
+  string GetOrAddNodeReductionConst() {
+    string const_name;
+    if (is_in_frame_) {
+      auto const_node = AddNodeReductionConst(
+          node_->name(), NodeName(node_->input(0)), node_->device());
+      const_name = const_node->name();
+    } else {
+      const_name = kReductionConst;
+    }
+    return const_name;
+  }
 };
 
 struct TuningConfig {
@@ -951,13 +1187,12 @@ struct TuningConfig {
   bool no_gemm;
 };
 
-class DataLayoutOptimizer {
+class DataLayoutOptimizer : GraphProcessor {
  public:
   explicit DataLayoutOptimizer(const string& default_device, GraphDef* graph,
-                               TuningConfig config)
-      : default_device_(default_device),
-        graph_(graph),
-        node_map_(graph_),
+                               NodeMap* node_map, TuningConfig config)
+      : GraphProcessor(graph, node_map),
+        default_device_(default_device),
         config_(config) {}
 
   Status Optimize() {
@@ -970,105 +1205,65 @@ class DataLayoutOptimizer {
   }
 
  private:
-  NodeDef* AddNodePermConst(const string& name,
-                            const std::vector& permutation) {
-    NodeDef* node = graph_->add_node();
-    node_map_.AddNode(name, node);
-    node->set_name(name);
-    node->set_op("Const");
-    node->set_device(default_device_);
-    AttrValue attr_data_type;
-    attr_data_type.set_type(DT_INT32);
-    node->mutable_attr()->insert({"dtype", attr_data_type});
-    AttrValue attr_tensor;
-    Tensor tensor(DT_INT32, TensorShape({4}));
-    for (int i = 0; static_cast(i) < permutation.size(); i++) {
-      tensor.flat()(i) = permutation[i];
-    }
-    tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
-    node->mutable_attr()->insert({"value", attr_tensor});
-    return node;
+  NodeDef* AddNodePermNHWCToNCHW() {
+    return AddNodePermConst(kPermNHWCToNCHW, default_device_, {0, 3, 1, 2});
   }
 
-  NodeDef* AddConstScalar(const char* name, DataType dtype, int value) {
-    NodeDef* node = graph_->add_node();
-    node_map_.AddNode(name, node);
-    node->set_name(name);
-    node->set_op("Const");
-    node->set_device(default_device_);
-    AttrValue attr_data_type;
-    attr_data_type.set_type(dtype);
-    node->mutable_attr()->insert({"dtype", attr_data_type});
-    AttrValue attr_tensor;
-    Tensor tensor(dtype, TensorShape({}));
-    tensor.scalar()() = value;
-    tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
-    node->mutable_attr()->insert({"value", attr_tensor});
-    return node;
+  NodeDef* AddNodePermNCHWToNHWC() {
+    return AddNodePermConst(kPermNCHWToNHWC, default_device_, {0, 2, 3, 1});
   }
 
   NodeDef* AddNodeConcatConst() {
-    return AddConstScalar(kConcatConst, DT_INT32, 1);
+    return AddNodeConstScalar(kConcatConst, default_device_, DT_INT32, 1);
   }
 
-  NodeDef* AddGatherAxisConst() {
-    return AddConstScalar(kGatherAxisConst, DT_INT32, 0);
+  NodeDef* AddNodeGatherAxisConst() {
+    return AddNodeConstScalar(kGatherAxisConst, default_device_, DT_INT32, 0);
   }
 
   NodeDef* AddNodeReductionConst() {
-    NodeDef* node = graph_->add_node();
-    node_map_.AddNode(kReductionConst, node);
-    node->set_name(kReductionConst);
-    node->set_op("Const");
-    node->set_device(default_device_);
-    AttrValue attr_data_type;
-    attr_data_type.set_type(DT_INT32);
-    node->mutable_attr()->insert({"dtype", attr_data_type});
-
-    AttrValue attr_tensor;
-    Tensor tensor(DT_INT32, TensorShape({3}));
-    std::vector axis = {0, 2, 3};
-    for (int i = 0; static_cast(i) < axis.size(); i++) {
-      tensor.flat()(i) = axis[i];
-    }
-    tensor.AsProtoTensorContent(attr_tensor.mutable_tensor());
-    node->mutable_attr()->insert({"value", attr_tensor});
-    return node;
+    return GraphProcessor::AddNodeReductionConst(kReductionConst,
+                                                 default_device_);
   }
 
   // Expand all nodes which is in NHWC, but supports NCHW or is layout agnostic.
   Status Expand() {
     int node_size_original = graph_->node_size();
+    std::unordered_map> frames;
+    IdentifyFrames(*graph_, &frames);
+
     // This is the first pass where we expand the nodes which support NCHW.
     std::set ops_format_supported = GetOpsFormatSupported();
-    for (int i = 0; i < graph_->node_size(); i++) {
+    for (int i = 0; i < node_size_original; i++) {
       if (ops_format_supported.find(graph_->node(i).op()) !=
           ops_format_supported.end()) {
         auto node = graph_->mutable_node(i);
+        bool is_in_frame = !frames[node].empty();
         std::unique_ptr node_processor;
         if (node->op().compare("AvgPoolGrad") == 0) {
           node_processor.reset(
-              new AvgPoolGradProcessor(graph_, node, &node_map_));
+              new AvgPoolGradProcessor(graph_, node, node_map_, is_in_frame));
         } else if (node->op().compare("BiasAddGrad") == 0) {
           node_processor.reset(
-              new BiasAddGradProcessor(graph_, node, &node_map_));
+              new BiasAddGradProcessor(graph_, node, node_map_, is_in_frame));
         } else if (node->op().compare("Conv2D") == 0) {
-          node_processor.reset(
-              new Conv2DProcessor(graph_, node, &node_map_, config_.no_gemm));
+          node_processor.reset(new Conv2DProcessor(
+              graph_, node, node_map_, config_.no_gemm, is_in_frame));
         } else if (node->op().compare("Conv2DBackpropFilter") == 0) {
           node_processor.reset(new Conv2DBackpropFilterProcessor(
-              graph_, node, &node_map_, config_.no_gemm));
+              graph_, node, node_map_, config_.no_gemm, is_in_frame));
         } else if (node->op().compare("Conv2DBackpropInput") == 0) {
           node_processor.reset(new Conv2DBackpropInputProcessor(
-              graph_, node, &node_map_, config_.no_gemm));
+              graph_, node, node_map_, config_.no_gemm, is_in_frame));
         } else if (node->op().compare("FusedBatchNormGrad") == 0) {
-          node_processor.reset(
-              new FusedBatchNormGradProcessor(graph_, node, &node_map_));
+          node_processor.reset(new FusedBatchNormGradProcessor(
+              graph_, node, node_map_, is_in_frame));
         } else if (node->op().compare("MaxPoolGrad") == 0) {
           node_processor.reset(
-              new MaxPoolGradProcessor(graph_, node, &node_map_));
+              new MaxPoolGradProcessor(graph_, node, node_map_, is_in_frame));
         } else {
-          node_processor.reset(new NodeProcessor(graph_, node, &node_map_));
+          node_processor.reset(
+              new NodeProcessor(graph_, node, node_map_, is_in_frame));
         }
         TF_RETURN_IF_ERROR(node_processor->ConvertNode());
       }
@@ -1078,54 +1273,57 @@ class DataLayoutOptimizer {
     // only needs to be performed if at least one node in the previous pass is
     // expanded.
     if (graph_->node_size() > node_size_original) {
-      NodeDef* n = AddNodePermConst(kPermNHWCToNCHW, {0, 3, 1, 2});
-      n = AddNodePermConst(kPermNCHWToNHWC, {0, 2, 3, 1});
+      NodeDef* n = AddNodePermNHWCToNCHW();
+      n = AddNodePermNCHWToNHWC();
       n = AddNodeConcatConst();
-      n = AddGatherAxisConst();
+      n = AddNodeGatherAxisConst();
       n = AddNodeReductionConst();
       std::set ops_format_agnostic = GetOpsFormatAgnostic();
       for (int i = 0; i < graph_->node_size(); i++) {
         if (ops_format_agnostic.find(graph_->node(i).op()) !=
             ops_format_agnostic.end()) {
           auto node = graph_->mutable_node(i);
+          bool is_in_frame = !frames[node].empty();
           std::unique_ptr node_processor;
           if (node->op().compare("AddN") == 0) {
-            node_processor.reset(new AddNProcessor(graph_, node, &node_map_));
+            node_processor.reset(
+                new AddNProcessor(graph_, node, node_map_, is_in_frame));
           } else if (node->op().compare("Add") == 0 ||
                      node->op().compare("Mul") == 0 ||
                      node->op().compare("RealDiv") == 0 ||
                      node->op().compare("SquaredDifference") == 0 ||
                      node->op().compare("Sub") == 0) {
             node_processor.reset(
-                new BinaryOpProcessor(graph_, node, &node_map_));
+                new BinaryOpProcessor(graph_, node, node_map_, is_in_frame));
           } else if (node->op().compare("Concat") == 0 ||
                      node->op().compare("ConcatV2") == 0) {
-            node_processor.reset(new ConcatProcessor(graph_, node, &node_map_));
+            node_processor.reset(
+                new ConcatProcessor(graph_, node, node_map_, is_in_frame));
           } else if (node->op().compare("ReluGrad") == 0) {
             node_processor.reset(
-                new ReluGradProcessor(graph_, node, &node_map_));
+                new ReluGradProcessor(graph_, node, node_map_, is_in_frame));
           } else if (node->op().compare("Slice") == 0) {
-            auto input1 = node_map_.GetNode(NodeName(node->input(1)));
-            auto input2 = node_map_.GetNode(NodeName(node->input(2)));
+            auto input1 = node_map_->GetNode(NodeName(node->input(1)));
+            auto input2 = node_map_->GetNode(NodeName(node->input(2)));
             if (input1->op() == "ConcatOffset") {
-              node_processor.reset(
-                  new SliceProcessorConcatOffset(graph_, node, &node_map_));
+              node_processor.reset(new SliceProcessorConcatOffset(
+                  graph_, node, node_map_, is_in_frame));
             } else if (input1->op() == "Const" && input2->op() == "Const") {
-              node_processor.reset(
-                  new SliceProcessorConst(graph_, node, &node_map_));
+              node_processor.reset(new SliceProcessorConst(
+                  graph_, node, node_map_, is_in_frame));
             } else {
               node_processor.reset(
-                  new SliceProcessor(graph_, node, &node_map_));
+                  new SliceProcessor(graph_, node, node_map_, is_in_frame));
             }
-
           } else if (node->op().compare("Squeeze") == 0) {
             node_processor.reset(
-                new SqueezeProcessor(graph_, node, &node_map_));
+                new SqueezeProcessor(graph_, node, node_map_, is_in_frame));
           } else if (node->op().compare("Sum") == 0) {
-            node_processor.reset(new SumProcessor(graph_, node, &node_map_));
-          } else {
             node_processor.reset(
-                new AgnosticNodeProcessor(graph_, node, &node_map_));
+                new SumProcessor(graph_, node, node_map_, is_in_frame));
+          } else {
+            node_processor.reset(new AgnosticNodeProcessor(
+                graph_, node, node_map_, is_in_frame));
           }
           TF_RETURN_IF_ERROR(node_processor->ConvertNode());
         }
@@ -1145,12 +1343,12 @@ class DataLayoutOptimizer {
         if (IsNodeNCHWToNHWC(node->input(0))) {
           const string& trans_first = node->input(0);
           const string& trans_second = node->name();
-          auto outputs = node_map_.GetOutputs(trans_second);
+          auto outputs = node_map_->GetOutputs(trans_second);
           CHECK(outputs.size() == 1)
               << "There is always only a single output for a Transpose node, "
               << "due to the way it is added by NodeProcessor.";
           NodeDef* output = *outputs.begin();
-          string input = node_map_.GetNode(trans_first)->input(0);
+          string input = node_map_->GetNode(trans_first)->input(0);
           for (int i = 0; i < output->input_size(); i++) {
             if (output->input(i).compare(trans_second) == 0) {
               *output->mutable_input(i) = input;
@@ -1173,8 +1371,6 @@ class DataLayoutOptimizer {
   }
 
   string default_device_;
-  GraphDef* graph_;
-  NodeMap node_map_;
   TuningConfig config_;
 };
 
@@ -1231,8 +1427,9 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
       default_device = cluster->GetDevices().begin()->first;
     }
   }
+  std::unique_ptr node_map(new NodeMap(output));
   std::unique_ptr layout_optimizer(
-      new DataLayoutOptimizer(default_device, output, config));
+      new DataLayoutOptimizer(default_device, output, node_map.get(), config));
   status = layout_optimizer->Optimize();
   // This is based on an empirical observation that if the introduced Transpose
   // nodes is more than 30, not using GEMM implementation would result in better
@@ -1240,8 +1437,9 @@ Status LayoutOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
   if (status.ok() && GetNumTranspose(*output) > 30) {
     *output = new_item.graph;
     config.no_gemm = true;
-    layout_optimizer.reset(
-        new DataLayoutOptimizer(default_device, output, config));
+    node_map.reset(new NodeMap(output));
+    layout_optimizer.reset(new DataLayoutOptimizer(default_device, output,
+                                                   node_map.get(), config));
     status = layout_optimizer->Optimize();
   }
 
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 948df18879..9e15744fab 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -177,5 +177,13 @@ bool ExecuteWithTimeout(std::function fn, const int64 timeout_in_ms,
   return notified;
 }
 
+string AsControlDependency(const NodeDef& node) {
+  return strings::StrCat("^", node.name());
+}
+
+string AsControlDependency(const string& node) {
+  return strings::StrCat("^", node);
+}
+
 }  // end namespace grappler
 }  // end namespace tensorflow
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index 4a8cb573d8..a9eccd685b 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -85,6 +85,14 @@ string AddPrefixToNodeName(const string& name, const string& prefix);
 bool ExecuteWithTimeout(std::function fn, int64 timeout_in_ms,
                         thread::ThreadPool* thread_pool);
 
+// Returns the node name prefixed with conventional symbol '^'
+// for control dependency, given a NodeDef.
+string AsControlDependency(const NodeDef& node);
+//
+// Returns the node name prefixed with conventional symbol '^'
+// for control dependency, given a node name
+string AsControlDependency(const string& node);
+
 }  // end namespace grappler
 }  // end namespace tensorflow
 
diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py
index 5dbaf76edb..bda9502cd1 100644
--- a/tensorflow/python/grappler/layout_optimizer_test.py
+++ b/tensorflow/python/grappler/layout_optimizer_test.py
@@ -22,8 +22,10 @@ from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
@@ -51,9 +53,7 @@ def max_pool_2x2(x):
 
 
 # Taken from tensorflow/examples/tutorials/mnist/mnist_deep.py
-def two_layer_model():
-  random_seed.set_random_seed(0)
-  x = random_ops.truncated_normal([1, 784], seed=0)
+def two_layer_model(x):
   x_image = array_ops.reshape(x, [-1, 28, 28, 1])
   w_conv1 = weight([5, 5, 1, 32])
   b_conv1 = bias([32])
@@ -66,24 +66,39 @@ def two_layer_model():
   return h_pool2
 
 
+def loop():
+  random_seed.set_random_seed(0)
+  x1 = random_ops.truncated_normal([1, 784], seed=0)
+  x2 = random_ops.truncated_normal([1, 784], seed=0)
+  x3 = random_ops.truncated_normal([1, 784], seed=0)
+  x4 = random_ops.truncated_normal([1, 784], seed=0)
+  elems = (x1, x2, x3, x4)
+  outputs = functional_ops.map_fn(two_layer_model, elems, dtype=dtypes.float32)
+  return outputs
+
+
+def get_config():
+  rewrite_options = rewriter_config_pb2.RewriterConfig(
+      optimize_tensor_layout=True)
+  graph_options = config_pb2.GraphOptions(
+      rewrite_options=rewrite_options, build_cost_model=1)
+  config = config_pb2.ConfigProto(graph_options=graph_options)
+  return config
+
+
 class LayoutOptimizerTest(test.TestCase):
   """Tests the Grappler layout optimizer."""
 
   def testTwoConvLayers(self):
     if test.is_gpu_available(cuda_only=True):
-      output = two_layer_model()
+      random_seed.set_random_seed(0)
+      x = random_ops.truncated_normal([1, 784], seed=0)
+      output = two_layer_model(x)
 
       with session.Session() as sess:
         output_val_ref = sess.run(output)
 
-      rewrite_options = rewriter_config_pb2.RewriterConfig(
-          optimize_tensor_layout=True)
-      graph_options = config_pb2.GraphOptions(
-          rewrite_options=rewrite_options,
-          build_cost_model=1)
-      config = config_pb2.ConfigProto(graph_options=graph_options)
-
-      with session.Session(config=config) as sess:
+      with session.Session(config=get_config()) as sess:
         metadata = config_pb2.RunMetadata()
         output_val = sess.run(output, run_metadata=metadata)
 
@@ -105,6 +120,19 @@ class LayoutOptimizerTest(test.TestCase):
 
       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
 
+  def testLoop(self):
+    if test.is_gpu_available(cuda_only=True):
+      output = loop()
+
+      with session.Session() as sess:
+        output_val_ref = sess.run(output)
+
+      with session.Session(config=get_config()) as sess:
+        metadata = config_pb2.RunMetadata()
+        output_val = sess.run(output, run_metadata=metadata)
+
+      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 2f5035640cf0fe60facb40dec30803a7bd17f7d5 Mon Sep 17 00:00:00 2001
From: Bart Kiers 
Date: Tue, 29 Aug 2017 11:25:47 +0200
Subject: [PATCH 055/294] Fixed code formatting

---
 tensorflow/contrib/makefile/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md
index 835d68489e..715eb51577 100644
--- a/tensorflow/contrib/makefile/README.md
+++ b/tensorflow/contrib/makefile/README.md
@@ -201,7 +201,8 @@ tensorflow/contrib/makefile/compile_ios_protobuf.sh
 
 Then, you will need to compile the nsync library for iOS:
 
-```export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
+```bash
+export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
 export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
 ```
 
-- 
GitLab


From 3c6a603d8ae9035830626df0e261442a59f2b990 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 07:44:14 -0700
Subject: [PATCH 056/294] Disable the 'output size too large' check for the CPU
 path of depthwise convolution and fix error message for the GPU and NEON
 path.

PiperOrigin-RevId: 166845550
---
 tensorflow/core/kernels/depthwise_conv_op.cc           | 9 +++++----
 tensorflow/core/kernels/neon/BUILD                     | 1 +
 tensorflow/core/kernels/neon/neon_depthwise_conv_op.cc | 9 +++++----
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index ccd33c0861..3c01546d8d 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -368,10 +368,11 @@ class DepthwiseConv2dNativeOp : public BinaryOp {
     TensorShape out_shape =
         ShapeFromFormat(data_format_, batch, out_rows, out_cols, out_depth);
     OP_REQUIRES(
-        context, out_shape.num_elements() <= 2147483647,
-        errors::InvalidArgument("total number of outputs should be within the "
-                                "range of int which is used in the GPU kernel",
-                                in_depth, " vs ", filter.dim_size(2)));
+        context,
+        (!std::is_same::value ||
+         FastBoundsCheck(out_shape.num_elements(),
+                         std::numeric_limits::max())),
+        errors::InvalidArgument("Output elements too large for GPU kernel"));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
diff --git a/tensorflow/core/kernels/neon/BUILD b/tensorflow/core/kernels/neon/BUILD
index e74310228a..536b2bdc03 100644
--- a/tensorflow/core/kernels/neon/BUILD
+++ b/tensorflow/core/kernels/neon/BUILD
@@ -37,6 +37,7 @@ tf_kernel_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core/kernels:bounds_check",
         "//tensorflow/core/kernels:ops_util",
         "@gemmlowp//:gemmlowp",
     ],
diff --git a/tensorflow/core/kernels/neon/neon_depthwise_conv_op.cc b/tensorflow/core/kernels/neon/neon_depthwise_conv_op.cc
index 818b44aab3..17f2af550f 100644
--- a/tensorflow/core/kernels/neon/neon_depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/neon/neon_depthwise_conv_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bounds_check.h"
 #include "tensorflow/core/kernels/neon/depthwiseconv_float.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -95,10 +96,10 @@ class NeonDepthwiseConv2dNativeOp : public BinaryOp {
                                          padding_, &out_cols, &pad_cols));
     TensorShape out_shape({batch, out_rows, out_cols, out_depth});
     OP_REQUIRES(
-        context, out_shape.num_elements() <= 2147483647,
-        errors::InvalidArgument("total number of outputs should be within the "
-                                "range of int which is used in the GPU kernel",
-                                in_depth, " vs ", filter.dim_size(2)));
+        context,
+        FastBoundsCheck(out_shape.num_elements(),
+                        std::numeric_limits::max()),
+        errors::InvalidArgument("Output elements too large for NEON kernel"));
 
     // Output tensor is of the following dimensions:
     // [ in_batch, out_rows, out_cols, out_depth ]
-- 
GitLab


From 0fd2a74120b86972441378f79fb5d03e86fed856 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne 
Date: Tue, 29 Aug 2017 08:24:23 -0700
Subject: [PATCH 057/294] Introduce C++ API while loop builder method

This change adds a new function, BuildWhileLoop(), that constructs a
while loop. BuildWhileLoop() takes functions that build the cond and
body graphs, similar to the Python while_loop function. It also
switches the C API to use this new function in order to reduce code
duplication. This is in preparation for while loop gradients, which
are also implemented in the C++ API (along with the other gradient
code).

I didn't write unit tests for BuildWhileLoop, instead relying on the
current C API while loop tests.

This change also disables while loop creation on Android to avoid
pulling in extra C++ dependencies.

PiperOrigin-RevId: 166849829
---
 tensorflow/c/BUILD                       |   1 +
 tensorflow/c/c_api.cc                    | 363 +++++++++--------------
 tensorflow/cc/BUILD                      |  20 ++
 tensorflow/cc/framework/scope.h          |  12 +-
 tensorflow/cc/framework/scope_internal.h |   5 +
 tensorflow/cc/ops/while_loop.cc          | 223 ++++++++++++++
 tensorflow/cc/ops/while_loop.h           |  64 ++++
 tensorflow/contrib/cmake/tf_c.cmake      |  13 +-
 tensorflow/contrib/cmake/tf_cc_ops.cmake |  10 +
 tensorflow/contrib/cmake/tf_python.cmake |   2 +
 10 files changed, 489 insertions(+), 224 deletions(-)
 create mode 100644 tensorflow/cc/ops/while_loop.cc
 create mode 100644 tensorflow/cc/ops/while_loop.h

diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index df2ae229db..604dfab148 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -61,6 +61,7 @@ tf_cuda_library(
             "//tensorflow/cc:ops",
             "//tensorflow/cc:grad_ops",
             "//tensorflow/cc:scope_internal",
+            "//tensorflow/cc:while_loop",
             "//tensorflow/core:core_cpu",
             "//tensorflow/core:core_cpu_internal",
             "//tensorflow/core:framework",
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 1ea70f0598..07c8277a6f 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/cc/framework/gradients.h"
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/framework/scope_internal.h"
+#include "tensorflow/cc/ops/while_loop.h"
 #include "tensorflow/cc/saved_model/loader.h"
 #endif
 #include "tensorflow/c/c_api_internal.h"
@@ -831,6 +832,30 @@ const tensorflow::AttrValue* GetAttrValue(TF_Operation* oper,
   return attr;
 }
 
+TensorId ToTensorId(const TF_Output& output) {
+  return TensorId(output.oper->node.name(), output.index);
+}
+
+#ifndef __ANDROID__
+std::vector OutputsFromTFOutputs(TF_Output* tf_outputs,
+                                                     int n) {
+  std::vector outputs(n);
+  for (int i = 0; i < n; ++i) {
+    outputs[i] =
+        tensorflow::Output(&tf_outputs[i].oper->node, tf_outputs[i].index);
+  }
+  return outputs;
+}
+
+void TFOutputsFromOutputs(const std::vector& outputs,
+                          TF_Output* tf_outputs) {
+  for (int i = 0; i < outputs.size(); i++) {
+    tf_outputs[i].oper = ToOperation(outputs[i].node());
+    tf_outputs[i].index = outputs[i].index();
+  }
+}
+#endif  // __ANDROID__
+
 }  // namespace
 
 // Shape functions -----------------------------------------------------------
@@ -1721,14 +1746,6 @@ void TF_ImportGraphDefOptionsSetPrefix(TF_ImportGraphDefOptions* opts,
   opts->opts.prefix = prefix;
 }
 
-namespace {
-
-TensorId ToTensorId(const TF_Output& output) {
-  return TensorId(output.oper->node.name(), output.index);
-}
-
-}  // namespace
-
 void TF_ImportGraphDefOptionsAddInputMapping(TF_ImportGraphDefOptions* opts,
                                              const char* src_name,
                                              int src_index, TF_Output dst) {
@@ -1812,6 +1829,11 @@ void TF_GraphImportGraphDef(TF_Graph* graph, const TF_Buffer* graph_def,
 // While loop functions -------------------------------------------------------
 
 namespace {
+
+#ifndef __ANDROID__
+
+// Creates a placeholder representing an input to the cond or body graph.
+// TODO(skyewm): remove these from final graph
 bool CreateInput(const TF_Output& parent_input, TF_Graph* g, const char* name,
                  TF_Output* input, TF_Status* status) {
   TF_OperationDescription* desc = TF_NewOperation(g, "Placeholder", name);
@@ -1823,130 +1845,50 @@ bool CreateInput(const TF_Output& parent_input, TF_Graph* g, const char* name,
   return true;
 }
 
-bool CreateEnter(TF_Graph* g, const char* node_name, const char* frame_name,
-                 const TF_Output& input, TF_Output* enter, TF_Status* status)
-    EXCLUSIVE_LOCKS_REQUIRED(g->mu) {
-  TF_OperationDescription* desc = TF_NewOperationLocked(g, "Enter", node_name);
-  TF_AddInput(desc, input);
-  TF_SetAttrString(desc, "frame_name", frame_name, strlen(frame_name));
-  TF_Operation* oper = TF_FinishOperationLocked(desc, status);
-  if (!status->status.ok()) return false;
-  *enter = {oper, 0};
-  return true;
-}
-
-bool CreateMerge(TF_Graph* g, const char* name, const TF_Output& input,
-                 const char* backedge_name, int backedge_index,
-                 TF_Output* merge, TF_Status* status)
-    EXCLUSIVE_LOCKS_REQUIRED(g->mu) {
-  TF_OperationDescription* desc = TF_NewOperationLocked(g, "Merge", name);
-
-  // The merge nodes accept the while loop's back edges as an input. Use the
-  // underlying NodeBuilder API directly to create an input to the
-  // not-yet-created back edge.
-  std::vector input_list;
-  input_list.push_back(NodeBuilder::NodeOut(&input.oper->node, input.index));
-  // All merge inputs must have same type
-  DataType type = input.oper->node.output_type(input.index);
-  input_list.push_back(
-      NodeBuilder::NodeOut(backedge_name, backedge_index, type));
-
-  desc->node_builder.Input(input_list);
-
-  TF_Operation* oper = TF_FinishOperationLocked(desc, status);
-  if (!status->status.ok()) return false;
-  *merge = {oper, 0};
-  return true;
-}
-
-bool CreateSwitch(TF_Graph* g, const char* name, const TF_Output& input,
-                  const TF_Output& predicate, TF_Output* switch_true,
-                  TF_Output* switch_false, TF_Status* status)
-    EXCLUSIVE_LOCKS_REQUIRED(g->mu) {
-  TF_OperationDescription* desc = TF_NewOperationLocked(g, "Switch", name);
-  TF_AddInput(desc, input);
-  TF_AddInput(desc, predicate);
-  TF_Operation* oper = TF_FinishOperationLocked(desc, status);
-  if (!status->status.ok()) return false;
-  *switch_false = {oper, 0};
-  *switch_true = {oper, 1};
-  return true;
-}
-
-bool CreateNext(TF_Graph* g, const char* name, const TF_Output& input,
-                TF_Output* next, TF_Status* status)
-    EXCLUSIVE_LOCKS_REQUIRED(g->mu) {
-  TF_OperationDescription* desc =
-      TF_NewOperationLocked(g, "NextIteration", name);
-  TF_AddInput(desc, input);
-  TF_Operation* oper = TF_FinishOperationLocked(desc, status);
-  if (!status->status.ok()) return false;
-  *next = {oper, 0};
-  return true;
-}
-
-bool CreateExit(TF_Graph* g, const char* name, const TF_Output& input,
-                TF_Output* exit, TF_Status* status)
-    EXCLUSIVE_LOCKS_REQUIRED(g->mu) {
-  TF_OperationDescription* desc = TF_NewOperationLocked(g, "Exit", name);
-  TF_AddInput(desc, input);
-  TF_Operation* oper = TF_FinishOperationLocked(desc, status);
-  if (!status->status.ok()) return false;
-  *exit = {oper, 0};
-  return true;
-}
-
-class ScopedImportGraphDefOptions {
- public:
-  ScopedImportGraphDefOptions() { opts_ = TF_NewImportGraphDefOptions(); }
-  ~ScopedImportGraphDefOptions() { TF_DeleteImportGraphDefOptions(opts_); }
-
-  TF_ImportGraphDefOptions* get() const { return opts_; }
-
- private:
-  TF_ImportGraphDefOptions* opts_;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(ScopedImportGraphDefOptions);
-};
-
 // Copies `src_graph` into `dst_graph`. Any node in `src_graph` with input
-// `src_inputs[i]` will have that input replaced with `dst_inputs[i]`.
-// `prefix` will be prepended to copied node names. `return_nodes` are nodes
-// in `src_graph`, and the new corresponding nodes in `dst_graph` will be
-// returned. `return_nodes` should be preallocated to size `nreturn_nodes`.
-bool CopyGraph(TF_Graph* src_graph, TF_Graph* dst_graph,
-               const TF_Output* src_inputs,
-               const std::vector& dst_inputs, const char* prefix,
-               const TF_Output* nodes_to_return, int nreturn_nodes,
-               TF_Output* return_nodes, TF_Status* s)
-    EXCLUSIVE_LOCKS_REQUIRED(dst_graph->mu) {
+// `src_inputs[i]` will have that input replaced with `dst_inputs[i]`.  `prefix`
+// will be prepended to copied node names. `control_deps` are nodes in
+// `dst_graph` that the copied `src_graph` nodes will have control dependencies
+// on. `return_nodes` are nodes in `src_graph`, and the new corresponding nodes
+// in `dst_graph` will be returned. `return_nodes` must be non-null.
+Status CopyGraph(Graph* src_graph, Graph* dst_graph,
+                 tensorflow::ShapeRefiner* dst_refiner,
+                 const TF_Output* src_inputs,
+                 const std::vector& dst_inputs,
+                 const tensorflow::string& prefix,
+                 const std::vector& control_deps,
+                 const TF_Output* nodes_to_return, int nreturn_nodes,
+                 std::vector* return_nodes) {
+  DCHECK(return_nodes != nullptr);
   GraphDef gdef;
-  src_graph->graph.ToGraphDef(&gdef);
+  src_graph->ToGraphDef(&gdef);
 
-  ScopedImportGraphDefOptions opts;
-  TF_ImportGraphDefOptionsSetPrefix(opts.get(), prefix);
+  tensorflow::ImportGraphDefOptions opts;
+  opts.prefix = prefix;
 
   for (int i = 0; i < dst_inputs.size(); ++i) {
-    TensorId src = ToTensorId(src_inputs[i]);
-    TF_ImportGraphDefOptionsAddInputMapping(opts.get(), src.first.data(),
-                                            src.second, dst_inputs[i]);
+    opts.input_map[ToTensorId(src_inputs[i])] =
+        TensorId(dst_inputs[i].node()->name(), dst_inputs[i].index());
   }
-  opts.get()->opts.skip_mapped_nodes = true;
+  opts.skip_mapped_nodes = true;
 
-  // We use the pivot node to control constants in `src_graph`
-  TF_Operation* pivot = dst_inputs[0].oper;
-  TF_ImportGraphDefOptionsAddControlDependency(opts.get(), pivot);
+  for (const tensorflow::Operation& op : control_deps) {
+    opts.control_dependencies.push_back(op.node()->name());
+  }
 
   for (int i = 0; i < nreturn_nodes; ++i) {
-    TF_ImportGraphDefOptionsAddReturnOutput(
-        opts.get(), nodes_to_return[i].oper->node.name().c_str(),
-        nodes_to_return[i].index);
+    opts.return_tensors.push_back(ToTensorId(nodes_to_return[i]));
   }
 
-  GraphImportGraphDefLocked(dst_graph, gdef, opts.get(), return_nodes,
-                            nreturn_nodes, s);
-  if (TF_GetCode(s) != TF_OK) return false;
-  return true;
+  // TOOD(skyewm): change to OutputTensor
+  std::vector> return_tensors;
+  TF_RETURN_IF_ERROR(
+      ImportGraphDef(opts, gdef, dst_graph, dst_refiner, &return_tensors));
+
+  for (const auto& pair : return_tensors) {
+    return_nodes->emplace_back(pair.first, pair.second);
+  }
+  return Status::OK();
 }
 
 bool ValidateConstWhileParams(const TF_WhileParams& params, TF_Status* s) {
@@ -1982,6 +1924,8 @@ bool ValidateInputWhileParams(const TF_WhileParams& params, TF_Status* s) {
   return true;
 }
 
+#endif  // __ANDROID__
+
 void FreeWhileResources(const TF_WhileParams* params) {
   TF_DeleteGraph(params->cond_graph);
   TF_DeleteGraph(params->body_graph);
@@ -1999,6 +1943,13 @@ TF_WhileParams EmptyWhileParams() {
 
 TF_WhileParams TF_NewWhile(TF_Graph* g, TF_Output* inputs, int ninputs,
                            TF_Status* status) {
+#ifdef __ANDROID__
+  status->status = tensorflow::errors::Unimplemented(
+      "Creating while loops is not supported in Android. File a bug at "
+      "https://github.com/tensorflow/tensorflow/issues if this feature is "
+      "important to you");
+  return EmptyWhileParams();
+#else
   if (ninputs == 0) {
     status->status =
         InvalidArgument("TF_NewWhile() must be passed at least one input");
@@ -2039,8 +1990,10 @@ TF_WhileParams TF_NewWhile(TF_Graph* g, TF_Output* inputs, int ninputs,
     return EmptyWhileParams();
   }
   return params;
+#endif  // __ANDROID__
 }
 
+#ifndef __ANDROID__
 namespace {
 
 // TODO(skyewm): make nodes in while loop unfetchable like in Python version
@@ -2050,113 +2003,90 @@ void TF_FinishWhileHelper(const TF_WhileParams* params, TF_Status* status,
 
   TF_Graph* parent = params->cond_graph->parent;
   TF_Output* parent_inputs = params->cond_graph->parent_inputs;
-  int n = params->ninputs;
+  int num_loop_vars = params->ninputs;
 
   mutex_lock l(parent->mu);
 
-  // Create Enter nodes
-  std::vector enter_nodes(n);
-  for (int i = 0; i < n; ++i) {
-    if (!CreateEnter(parent, StrCat(params->name, "/enter", i).c_str(),
-                     params->name, parent_inputs[i], &enter_nodes[i], status)) {
-      return;
-    }
-  }
-
-  // Create Merge nodes
-  std::vector merge_nodes(n);
-  for (int i = 0; i < n; ++i) {
-    if (!CreateMerge(parent, StrCat(params->name, "/merge", i).c_str(),
-                     enter_nodes[i], StrCat(params->name, "/next", i).c_str(),
-                     0, &merge_nodes[i], status)) {
-      return;
-    }
-  }
-
-  // Copy cond_graph to parent and replace input placeholders with merge node
-  // outputs, and get handle to new cond output
-  tensorflow::string cond_prefix = StrCat(params->name, "/cond");
-  TF_Output cond_output;
-  if (!CopyGraph(params->cond_graph, parent, params->cond_inputs, merge_nodes,
-                 cond_prefix.c_str(), ¶ms->cond_output, 1, &cond_output,
-                 status)) {
-    return;
-  }
-
-  // Create Switch nodes
-  std::vector switch_trues(n);
-  std::vector switch_falses(n);
-  for (int i = 0; i < n; ++i) {
-    if (!CreateSwitch(parent, StrCat(params->name, "/switch", i).c_str(),
-                      merge_nodes[i], cond_output, &switch_trues[i],
-                      &switch_falses[i], status)) {
-      return;
-    }
-  }
-
-  // Copy body_graph to parent, replace input placeholders with switch node
-  // true outputs, and get handles to new body outputs
-  tensorflow::string body_prefix = StrCat(params->name, "/body");
-  std::vector body_outputs(n);
-  if (!CopyGraph(params->body_graph, parent, params->body_inputs, switch_trues,
-                 body_prefix.c_str(), params->body_outputs, n,
-                 body_outputs.data(), status)) {
-    return;
-  }
-
-  // Create Next nodes
-  std::vector next_nodes(n);
-  for (int i = 0; i < n; ++i) {
-    if (!CreateNext(parent, StrCat(params->name, "/next", i).c_str(),
-                    body_outputs[i], &next_nodes[i], status)) {
-      return;
-    }
-  }
-
-  // Create Exit nodes (which are the outputs of the while loop)
-  for (int i = 0; i < n; ++i) {
-    if (!CreateExit(parent, StrCat(params->name, "/exit", i).c_str(),
-                    switch_falses[i], &outputs[i], status)) {
-      return;
-    }
+  // 'cond_fn' copies the cond graph into the parent graph.
+  tensorflow::ops::CondGraphBuilderFn cond_fn =
+      [params, parent](const tensorflow::Scope& scope,
+                       const std::vector& inputs,
+                       tensorflow::Output* output) {
+        DCHECK_EQ(scope.graph(), &parent->graph);
+        std::vector cond_output;
+        TF_RETURN_IF_ERROR(CopyGraph(
+            ¶ms->cond_graph->graph, &parent->graph, &parent->refiner,
+            params->cond_inputs, inputs, scope.impl()->name(),
+            scope.impl()->control_deps(), ¶ms->cond_output,
+            /* nreturn_nodes */ 1, &cond_output));
+        *output = cond_output[0];
+        return Status::OK();
+      };
+
+  // 'body_fn' copies the body graph into the parent graph.
+  tensorflow::ops::BodyGraphBuilderFn body_fn =
+      [params, parent, num_loop_vars](
+          const tensorflow::Scope& scope,
+          const std::vector& inputs,
+          std::vector* outputs) {
+        DCHECK_EQ(scope.graph(), &parent->graph);
+        TF_RETURN_IF_ERROR(
+            CopyGraph(¶ms->body_graph->graph, &parent->graph,
+                      &parent->refiner, params->body_inputs, inputs,
+                      scope.impl()->name(), scope.impl()->control_deps(),
+                      params->body_outputs, num_loop_vars, outputs));
+        return Status::OK();
+      };
+
+  // Create the while loop using an internal scope.
+  tensorflow::Scope scope =
+      NewInternalScope(&parent->graph, &status->status, &parent->refiner)
+          .NewSubScope(params->name);
+
+  const int first_new_node_id = parent->graph.num_node_ids();
+
+  tensorflow::OutputList loop_outputs;
+  status->status = tensorflow::ops::BuildWhileLoop(
+      scope, OutputsFromTFOutputs(parent_inputs, num_loop_vars), cond_fn,
+      body_fn, params->name, &loop_outputs);
+
+  // Update name_map with newly-created ops.
+  // TODO(skyewm): right now BuildWhileLoop() may alter the graph if it returns
+  // a bad status. Once we fix this, we may want to return early instead of
+  // executing the following code.
+  for (int i = first_new_node_id; i < parent->graph.num_node_ids(); ++i) {
+    Node* new_node = parent->graph.FindNodeId(i);
+    if (new_node == nullptr) continue;
+    parent->name_map[new_node->name()] = new_node;
+  }
+
+  // Populate 'outputs'.
+  DCHECK_LE(loop_outputs.size(), num_loop_vars);
+  for (int i = 0; i < loop_outputs.size(); ++i) {
+    outputs[i] = {ToOperation(loop_outputs[i].node()), loop_outputs[i].index()};
   }
 }
 
 }  // namespace
+#endif  // __ANDROID__
 
 void TF_FinishWhile(const TF_WhileParams* params, TF_Status* status,
                     TF_Output* outputs) {
+#ifdef __ANDROID__
+  status->status = tensorflow::errors::Unimplemented(
+      "Creating while loops is not supported in Android. File a bug at "
+      "https://github.com/tensorflow/tensorflow/issues if this feature is "
+      "important to you");
+#else
   // If it appears the caller created or modified `params`, don't free resources
   if (!ValidateConstWhileParams(*params, status)) return;
   TF_FinishWhileHelper(params, status, outputs);
   FreeWhileResources(params);
+#endif  // __ANDROID__
 }
 
 void TF_AbortWhile(const TF_WhileParams* params) { FreeWhileResources(params); }
 
-#ifndef __ANDROID__
-namespace {
-
-void OutputsFromTFOutputs(TF_Output* tf_outputs, int n, TF_Status* status,
-                          std::vector* outputs) {
-  outputs->resize(n);
-  for (int i = 0; i < n; i++) {
-    const TF_Output& tf_output = tf_outputs[i];
-    (*outputs)[i] = tensorflow::Output(&tf_output.oper->node, tf_output.index);
-  }
-}
-
-void TFOutputsFromOutputs(const std::vector& outputs,
-                          TF_Output* tf_outputs) {
-  for (int i = 0; i < outputs.size(); i++) {
-    tf_outputs[i].oper = ToOperation(outputs[i].node());
-    tf_outputs[i].index = outputs[i].index();
-  }
-}
-
-}  // namespace
-#endif  // __ANDROID__
-
 void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny, TF_Output* x, int nx,
                      TF_Output* dx, TF_Status* status, TF_Output* dy) {
 #ifdef __ANDROID__
@@ -2165,25 +2095,22 @@ void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny, TF_Output* x, int nx,
       "https://github.com/tensorflow/tensorflow/issues if this feature is "
       "important to you");
 #else
-  std::vector y_arg;
-  std::vector x_arg;
+  std::vector y_arg = OutputsFromTFOutputs(y, ny);
+  std::vector x_arg = OutputsFromTFOutputs(x, nx);
   std::vector dy_arg;
-  OutputsFromTFOutputs(y, ny, status, &y_arg);
-  OutputsFromTFOutputs(x, nx, status, &x_arg);
 
   {
     // We need to hold on to the lock while we have a scope that uses TF_Graph.
     mutex_lock graph_lock(g->mu);
 
-    const int max_node_id_before = g->graph.num_node_ids();
+    const int first_new_node_id = g->graph.num_node_ids();
 
     tensorflow::Scope scope =
         NewInternalScope(&g->graph, &status->status, &g->refiner)
             .NewSubScope("gradients");
 
     if (dx != nullptr) {
-      std::vector dx_arg;
-      OutputsFromTFOutputs(dx, ny, status, &dx_arg);
+      std::vector dx_arg = OutputsFromTFOutputs(dx, ny);
       status->status =
           AddSymbolicGradients(scope, y_arg, x_arg, dx_arg, &dy_arg);
     } else {
@@ -2192,7 +2119,7 @@ void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny, TF_Output* x, int nx,
 
     // Update g->name_map with the name_map from the scope, which will contain
     // the new gradient ops.
-    for (int i = max_node_id_before; i < g->graph.num_node_ids(); ++i) {
+    for (int i = first_new_node_id; i < g->graph.num_node_ids(); ++i) {
       Node* n = g->graph.FindNodeId(i);
       if (n == nullptr) continue;
       g->name_map[n->name()] = n;
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index aa01767a41..d9071ba6e4 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -228,6 +228,26 @@ tf_cc_test(
     ],
 )
 
+cc_library_with_android_deps(
+    name = "while_loop",
+    srcs = ["ops/while_loop.cc"],
+    hdrs = ["ops/while_loop.h"],
+    android_deps = [
+        "//tensorflow/core:android_tensorflow_lib",
+    ],
+    common_deps = [
+        ":cc_ops",
+        ":cc_ops_internal",
+        ":ops",
+        ":scope",
+        ":scope_internal",
+    ],
+    deps = [
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+    ],
+)
+
 cc_library(
     name = "grad_op_registry",
     srcs = ["framework/grad_op_registry.cc"],
diff --git a/tensorflow/cc/framework/scope.h b/tensorflow/cc/framework/scope.h
index 5cae5c64ad..0335f6357d 100644
--- a/tensorflow/cc/framework/scope.h
+++ b/tensorflow/cc/framework/scope.h
@@ -167,7 +167,8 @@ class Scope {
 
   // START_SKIP_DOXYGEN
 
-  /// Update the builder with properties accumulated in this scope.
+  /// Update the builder with properties accumulated in this scope. Does not set
+  /// status().
   // TODO(skyewm): NodeBuilder is not part of public API
   void UpdateBuilder(NodeBuilder* builder) const;
   // END_SKIP_DOXYGEN
@@ -215,12 +216,15 @@ class Scope {
 
   const std::vector& control_deps() const;
 
- private:
-  friend class InternalScope;
+  // START_SKIP_DOXYGEN
   class Impl;
-  std::unique_ptr impl_;
   Impl* impl() { return impl_.get(); }
   const Impl* impl() const { return impl_.get(); }
+  // END_SKIP_DOXYGEN
+
+ private:
+  friend class InternalScope;
+  std::unique_ptr impl_;
   explicit Scope(Impl*);
 };
 
diff --git a/tensorflow/cc/framework/scope_internal.h b/tensorflow/cc/framework/scope_internal.h
index e2cc22af5d..968c366550 100644
--- a/tensorflow/cc/framework/scope_internal.h
+++ b/tensorflow/cc/framework/scope_internal.h
@@ -43,6 +43,9 @@ class Scope::Impl {
        const std::shared_ptr& name_map,
        const std::shared_ptr& refiner);
 
+  const string& name() const { return name_; }
+  const std::vector& control_deps() const { return control_deps_; }
+
  private:
   friend class Scope;
 
@@ -98,6 +101,8 @@ class Scope::Impl {
 
   const std::vector control_deps_;
 
+  // The fully-qualified name of this scope (i.e. includes any parent scope
+  // names).
   const string name_ = "";
   const string op_name_ = "";
   const bool exit_on_error_ = false;
diff --git a/tensorflow/cc/ops/while_loop.cc b/tensorflow/cc/ops/while_loop.cc
new file mode 100644
index 0000000000..27da77bbe0
--- /dev/null
+++ b/tensorflow/cc/ops/while_loop.cc
@@ -0,0 +1,223 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/ops/while_loop.h"
+
+#include "tensorflow/cc/framework/scope_internal.h"
+#include "tensorflow/cc/ops/control_flow_ops_internal.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/common_runtime/shape_refiner.h"
+#include "tensorflow/core/graph/node_builder.h"
+
+namespace tensorflow {
+namespace ops {
+
+namespace {
+
+// Utility function for converting to internal C++ datatypes.
+OutputTensor ToOutputTensor(const Output& output) {
+  return OutputTensor(output.node(), output.index());
+}
+
+// Utility function for converting to internal C++ datatypes.
+std::vector ToOutputTensors(const std::vector& outputs) {
+  std::vector result(outputs.size());
+  for (int i = 0; i < outputs.size(); ++i) {
+    result[i] = ToOutputTensor(outputs[i]);
+  }
+  return result;
+}
+
+// Utility function for converting to internal C++ datatypes.
+std::vector ToNodes(const std::vector& outputs) {
+  std::vector result(outputs.size());
+  for (int i = 0; i < outputs.size(); ++i) {
+    result[i] = outputs[i].node();
+  }
+  return result;
+}
+
+// Manually generates the name of the `loop_var_idx`-th NextIteration node of a
+// loop being constructed with `scope`. This is used to define the backedge
+// before the NextIteration node is created.
+string NextIterationName(const Scope& scope, int loop_var_idx) {
+  string result;
+  const string& prefix = scope.impl()->name();
+  if (!prefix.empty()) strings::StrAppend(&result, prefix, "/");
+  strings::StrAppend(&result, "NextIteration");
+  if (loop_var_idx > 0) strings::StrAppend(&result, "_", loop_var_idx);
+  return result;
+}
+
+// Creates the `loop_var_idx`-th Merge node of a loop being constructed with
+// `scope`. `enter_output` is the `loop_var_idx`-th Enter node's output.
+Status CreateMerge(const Scope& scope, int loop_var_idx,
+                   const Output& enter_output, Output* merge_output) {
+  // The merge nodes accept the while loop's back edges as an input (i.e. the
+  // not-yet-created next iteration nodes). Use the underlying NodeBuilder API
+  // directly to create the back edge.
+  NodeBuilder::NodeOut enter_input(enter_output.node(), enter_output.index());
+
+  const int next_output_index = 0;
+  DataType dtype = enter_output.node()->output_type(0);
+  NodeBuilder::NodeOut next_input(NextIterationName(scope, loop_var_idx),
+                                  next_output_index, dtype);
+
+  std::vector input_list({enter_input, next_input});
+  const string unique_name = scope.GetUniqueNameForOp("Merge");
+  NodeBuilder builder = NodeBuilder(unique_name, "Merge").Input(input_list);
+  scope.UpdateBuilder(&builder);
+
+  Node* merge_node;
+  TF_RETURN_IF_ERROR(builder.Finalize(scope.graph(), &merge_node));
+  TF_RETURN_IF_ERROR(scope.DoShapeInference(merge_node));
+  *merge_output = Output(merge_node, 0);
+  return Status::OK();
+}
+
+// Creates the condition subgraph defined by `cond`.
+Status CreateCond(const Scope& scope, const CondGraphBuilderFn& cond,
+                  const std::vector& inputs, Output* output) {
+  // The control dependency is for constants in the cond graph, and other ops
+  // that do not depend on the loop variables. This ensures that these ops are
+  // in the while loop frame (since they will indirectly depend on an Enter node
+  // defining the frame) and that they are executed once per loop iteration.
+  //
+  // TODO(skyewm): the control dep will be added to all nodes in the cond graph.
+  // This is at best unnecessary, and at worst may prevent different parts of
+  // different loop iterations from executing in parallel.
+  Scope cond_scope =
+      scope.NewSubScope("cond").WithControlDependencies(inputs[0]);
+  Output raw_cond_out;
+  TF_RETURN_IF_ERROR(cond(cond_scope, inputs, &raw_cond_out));
+  if (raw_cond_out.type() != DT_BOOL) {
+    return errors::InvalidArgument(
+        "BuildWhileLoop: 'cond' argument must return a boolean output, got ",
+        DataTypeString(raw_cond_out.type()));
+  }
+  *output = LoopCond(scope, raw_cond_out).output;
+  return Status::OK();
+}
+
+// Create the bdoy subgraph defined by `body`. `outputs` must be non-null and
+// empty.
+Status CreateBody(const Scope& scope, const BodyGraphBuilderFn& body,
+                  const std::vector& inputs,
+                  std::vector* outputs) {
+  DCHECK(outputs != nullptr);
+  DCHECK(outputs->empty());
+
+  // The control dependency is analogous to that in CreateCond().
+  Scope body_scope =
+      scope.NewSubScope("body").WithControlDependencies(inputs[0]);
+  TF_RETURN_IF_ERROR(body(body_scope, inputs, outputs));
+  const size_t num_loop_vars = inputs.size();
+  if (outputs->size() != num_loop_vars) {
+    return errors::InvalidArgument(
+        "BuildWhileLoop: 'body' argument expected to return ", num_loop_vars,
+        "outputs, got ", outputs->size());
+  }
+  // TODO(skyewm): check output types/shapes
+  return Status::OK();
+}
+
+}  // namespace
+
+// A while loop with a single loop variable looks like this:
+//
+// (output)
+//     ^    +---------------+
+//     |    | body subgraph +-------------+
+//    Exit  +---------------+             |
+//      ^    ^                            |
+//      |    |                            |
+//      Switch<--------+                  v
+//        ^            |             NextIteration
+//        |     +------+--------+         |
+//        +---->| cond subgraph |         |
+//        |     +---------------+         |
+//       Merge<---------------------------+
+//       ^
+//       |
+//    Enter
+//      ^
+//      |
+//   (input)
+//
+// If there are multiple loop variables, each of the control flow ops is
+// duplicated for each loop variable.
+// TODO(skyewm): link to public version of design doc
+Status BuildWhileLoop(const Scope& scope, const std::vector& inputs,
+                      const CondGraphBuilderFn& cond,
+                      const BodyGraphBuilderFn& body, const string& frame_name,
+                      OutputList* outputs) {
+  DCHECK(!inputs.empty());
+  DCHECK(outputs != nullptr);
+  DCHECK(outputs->empty());
+
+  TF_RETURN_IF_ERROR(scope.status());
+  const size_t num_loop_vars = inputs.size();
+
+  std::vector enter_outputs(num_loop_vars);
+  for (int i = 0; i < num_loop_vars; ++i) {
+    enter_outputs[i] = internal::Enter(scope, inputs[i], frame_name);
+  }
+  TF_RETURN_IF_ERROR(scope.status());
+
+  std::vector merge_outputs(num_loop_vars);
+  for (int i = 0; i < num_loop_vars; ++i) {
+    TF_RETURN_IF_ERROR(
+        CreateMerge(scope, i, enter_outputs[i], &merge_outputs[i]));
+  }
+
+  Output cond_out;
+  TF_RETURN_IF_ERROR(CreateCond(scope, cond, merge_outputs, &cond_out));
+
+  std::vector switch_trues(num_loop_vars);
+  std::vector switch_falses(num_loop_vars);
+  for (int i = 0; i < num_loop_vars; ++i) {
+    auto switch_i = Switch(scope, merge_outputs[i], cond_out);
+    switch_trues[i] = switch_i.output_true;
+    switch_falses[i] = switch_i.output_false;
+  }
+  TF_RETURN_IF_ERROR(scope.status());
+
+  std::vector body_outputs;
+  TF_RETURN_IF_ERROR(CreateBody(scope, body, switch_trues, &body_outputs));
+
+  std::vector next_outputs(num_loop_vars);
+  for (int i = 0; i < num_loop_vars; ++i) {
+    next_outputs[i] = NextIteration(scope, body_outputs[i]);
+    DCHECK_EQ(next_outputs[i].node()->name(), NextIterationName(scope, i));
+  }
+  TF_RETURN_IF_ERROR(scope.status());
+
+  // Create the backedges from the NextIteration nodes to the Merge nodes.
+  for (int i = 0; i < num_loop_vars; ++i) {
+    const int merge_backedge_output_index = 1;
+    scope.graph()->AddEdge(next_outputs[i].node(), next_outputs[i].index(),
+                           merge_outputs[i].node(),
+                           merge_backedge_output_index);
+  }
+
+  outputs->resize(num_loop_vars);
+  for (int i = 0; i < num_loop_vars; ++i) {
+    (*outputs)[i] = internal::Exit(scope, switch_falses[i]);
+  }
+  return scope.status();
+}
+
+}  // namespace ops
+}  // namespace tensorflow
diff --git a/tensorflow/cc/ops/while_loop.h b/tensorflow/cc/ops/while_loop.h
new file mode 100644
index 0000000000..253d5d8935
--- /dev/null
+++ b/tensorflow/cc/ops/while_loop.h
@@ -0,0 +1,64 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CC_OPS_WHILE_LOOP_H_
+#define THIRD_PARTY_TENSORFLOW_CC_OPS_WHILE_LOOP_H_
+
+#include "tensorflow/cc/framework/ops.h"
+#include "tensorflow/cc/framework/scope.h"
+
+namespace tensorflow {
+namespace ops {
+
+// Function that takes cond graph inputs and returns cond graph boolean output.
+// 'output' need not be set if an error is returned.
+typedef std::function& inputs,
+                             Output* output)>
+    CondGraphBuilderFn;
+
+// Function that takes body graph inputs and returns body graph outputs.
+// 'outputs' need not be populated if an error is returned.
+typedef std::function& inputs,
+                             std::vector* outputs)>
+    BodyGraphBuilderFn;
+
+// Constructs a while loop.
+//
+// Arguments:
+// * scope: used to construct the while loop.
+// * inputs: the initial values of the loop variables. Must be non-empty.
+// * cond: a function that builds the condition graph of the loop. Takes the
+//     current loop variables as inputs and returns a scalar boolean Output
+//     indicating whether the loop should continue.
+// * body: a function that builds the body graph of the loop. Takes the current
+//     loop variables as inputs and returns the updated loop variables.
+// * frame_name: the frame name to use for this while loop. This should be a
+//     unique name. This will be used as a prefix for created operations.
+// * outputs: output param that returns final loop variable outputs in non-error
+//     case. Must be non-null and empty.
+//
+// Returns an error if the while loop could not be fully constructed.
+//
+// TODO(skyewm): clean up partially-constructed loop in error case
+// TODO(skyewm): create public interface to this method
+Status BuildWhileLoop(const Scope& scope, const std::vector& inputs,
+                      const CondGraphBuilderFn& cond,
+                      const BodyGraphBuilderFn& body, const string& frame_name,
+                      OutputList* outputs);
+
+}  // namespace ops
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CC_OPS_WHILE_LOOP_H_
diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake
index 24291a94b3..87d946c346 100644
--- a/tensorflow/contrib/cmake/tf_c.cmake
+++ b/tensorflow/contrib/cmake/tf_c.cmake
@@ -29,10 +29,19 @@ set(tf_c_srcs
 )
 
 add_library(tf_c OBJECT ${tf_c_srcs})
-add_dependencies(tf_c tf_cc_framework tf_core_lib tf_protos_cc)
+add_dependencies(
+  tf_c
+  tf_cc_framework
+  tf_cc_while_loop
+  tf_core_lib
+  tf_protos_cc)
 
 add_library(tf_c_python_api OBJECT
   "${tensorflow_source_dir}/tensorflow/c/python_api.cc"
   "${tensorflow_source_dir}/tensorflow/c/python_api.h"
 )
-add_dependencies(tf_c_python_api tf_c tf_cc_framework tf_core_lib tf_protos_cc)
+add_dependencies(
+  tf_c_python_api
+  tf_c
+  tf_core_lib
+  tf_protos_cc)
diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake
index b53f428461..6632433087 100644
--- a/tensorflow/contrib/cmake/tf_cc_ops.cmake
+++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake
@@ -105,6 +105,16 @@ add_library(tf_cc_ops OBJECT
     "${tensorflow_source_dir}/tensorflow/cc/ops/standard_ops.h"
 )
 
+########################################################
+# tf_cc_while_loop library
+########################################################
+add_library(tf_cc_while_loop OBJECT
+    "${tensorflow_source_dir}/tensorflow/cc/ops/while_loop.h"
+    "${tensorflow_source_dir}/tensorflow/cc/ops/while_loop.cc"
+)
+
+add_dependencies(tf_cc_while_loop tf_core_framework tf_cc_ops)
+
 ########################################################
 # tf_cc library
 ########################################################
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 31d4fe5e7f..4802309937 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -819,6 +819,7 @@ if(WIN32)
         $
         $
         $
+        $
         $
         $
         $
@@ -868,6 +869,7 @@ add_library(pywrap_tensorflow_internal SHARED
     $
     $
     $
+    $
     $
     $
     $
-- 
GitLab


From 7cca41e8b06829141e2ade13f7e35fc49bd25c00 Mon Sep 17 00:00:00 2001
From: Jianwei Xie 
Date: Tue, 29 Aug 2017 08:25:48 -0700
Subject: [PATCH 058/294] Improves the reliability of RunConfig.uid

PiperOrigin-RevId: 166849991
---
 .../python/learn/estimators/run_config.py     |  4 +++-
 .../learn/estimators/run_config_test.py       | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
index 626c34014b..eae35d59ac 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
@@ -335,7 +335,9 @@ class RunConfig(ClusterConfig, core_run_config.RunConfig):
     # For class instance without __repr__, some special cares are required.
     # Otherwise, the object address will be used.
     if '_cluster_spec' in ordered_state:
-      ordered_state['_cluster_spec'] = ordered_state['_cluster_spec'].as_dict()
+      ordered_state['_cluster_spec'] = collections.OrderedDict(
+          sorted(ordered_state['_cluster_spec'].as_dict().items(),
+                 key=lambda t: t[0]))
     return ', '.join(
         '%s=%r' % (k, v) for (k, v) in six.iteritems(ordered_state))
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py b/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py
index 25ef1705c2..e559612f7b 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py
@@ -358,6 +358,25 @@ class RunConfigTest(test.TestCase):
       uid_2 = _create_run_config_with_cluster_spec(tf_config_2_str).uid()
       self.assertEqual(uid_1, uid_2)
 
+  def test_uid_for_different_cluster_specs(self):
+    tf_config_1 = {
+        "cluster": {
+            run_config_lib.TaskType.PS: ["host1:1", "host2:2"],
+            run_config_lib.TaskType.WORKER: ["host3:3", "host4:4", "host5:5"]
+        },
+    }
+
+    tf_config_2 = {
+        "cluster": {
+            run_config_lib.TaskType.PS: ["host1:1"],
+            run_config_lib.TaskType.WORKER: ["host3:3", "host4:4", "host5:5"]
+        },
+    }
+
+    uid_1 = _create_run_config_with_cluster_spec(json.dumps(tf_config_1)).uid()
+    uid_2 = _create_run_config_with_cluster_spec(json.dumps(tf_config_2)).uid()
+    self.assertNotEqual(uid_1, uid_2)
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From dfeb146be1256b35453719ad5c70c6d2b6e50369 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 09:23:21 -0700
Subject: [PATCH 059/294] Refactor Avg/MaxPoolShape(), adding
 MakeShapeFromFormat() and preparing for NCHW_VECT_C.

PiperOrigin-RevId: 166856720
---
 tensorflow/core/framework/common_shape_fns.cc | 183 ++++++++++--------
 .../core/framework/common_shape_fns_test.cc   |  16 +-
 2 files changed, 117 insertions(+), 82 deletions(-)

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 4c65c86e5e..0e3ea2ddfb 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -226,6 +226,46 @@ Status FusedConvBiasActivationShape(shape_inference::InferenceContext* c) {
   return Status::OK();
 }
 
+Status CheckFormatConstraintsOnShape(const TensorFormat tensor_format,
+                                     const ShapeHandle shape_handle,
+                                     const string& tensor_name,
+                                     shape_inference::InferenceContext* c) {
+  if (tensor_format == FORMAT_NCHW_VECT_C) {
+    // Check that the vect dim has size 4.
+    const int num_dims = c->Rank(shape_handle);
+    DimensionHandle vect_dim = c->Dim(
+        shape_handle, GetTensorInnerFeatureDimIndex(num_dims, tensor_format));
+    DimensionHandle unused_vect_dim;
+    TF_RETURN_IF_ERROR(c->WithValue(vect_dim, 4, &unused_vect_dim));
+  }
+
+  return Status::OK();
+}
+
+// Returns a new shape with the specified dims arranged in the specified
+// format. The returned value is owned by this context.
+// Note: if format = "FORMAT_NCHW_VECT_C" then C represents the outer_depth.
+Status MakeShapeFromFormat(TensorFormat format, DimensionOrConstant N,
+                           const std::vector& spatial,
+                           DimensionOrConstant C, ShapeHandle* out,
+                           shape_inference::InferenceContext* c) {
+  const int num_dims = GetTensorDimsFromSpatialDims(spatial.size(), format);
+  std::vector dims_actual(num_dims);
+  dims_actual[GetTensorBatchDimIndex(num_dims, format)] = c->MakeDim(N);
+  int outer_c_index = GetTensorFeatureDimIndex(num_dims, format);
+  dims_actual[outer_c_index] = c->MakeDim(C);
+  if (format == FORMAT_NCHW_VECT_C) {
+    dims_actual[GetTensorInnerFeatureDimIndex(num_dims, format)] =
+        c->MakeDim(4);
+  }
+  for (int spatial_dim = 0; spatial_dim < spatial.size(); spatial_dim++) {
+    dims_actual[GetTensorSpatialDimIndex(num_dims, format, spatial_dim)] =
+        c->MakeDim(spatial[spatial_dim]);
+  }
+  *out = c->MakeShape(dims_actual);
+  return Status::OK();
+}
+
 Status DimensionsFromShape(ShapeHandle shape, TensorFormat format,
                            DimensionHandle* batch_dim,
                            gtl::MutableArraySlice spatial_dims,
@@ -520,17 +560,25 @@ Status DepthwiseConv2DNativeShape(shape_inference::InferenceContext* c) {
 
 Status AvgPoolShape(shape_inference::InferenceContext* c) {
   ShapeHandle input_shape;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 4, &input_shape));
 
-  string data_format;
-  Status s = c->GetAttr("data_format", &data_format);
+  string data_format_str;
+  TensorFormat data_format;
+  Status s = c->GetAttr("data_format", &data_format_str);
+  if (s.ok()) {
+    FormatFromString(data_format_str, &data_format);
+  } else {
+    data_format = FORMAT_NHWC;
+  }
+
+  TF_RETURN_IF_ERROR(
+      CheckFormatConstraintsOnShape(data_format, input_shape, "input", c));
 
   std::vector strides;
   TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides));
   if (strides.size() != 4) {
     return errors::InvalidArgument(
-        "AvgPool requires the stride attribute to contain 4 values, but "
-        "got: ",
+        "AvgPool requires the stride attribute to contain 4 values, but got: ",
         strides.size());
   }
 
@@ -542,31 +590,20 @@ Status AvgPoolShape(shape_inference::InferenceContext* c) {
         kernel_sizes.size());
   }
 
-  int32 stride_rows, stride_cols;
-  int32 kernel_rows, kernel_cols;
-
-  if (s.ok() && data_format == "NCHW") {
-    // Canonicalize input shape to NHWC so the shape inference code below can
-    // process it.
-    auto dim = [&](char dimension) {
-      return c->Dim(input_shape, GetTensorDimIndex<2>(FORMAT_NCHW, dimension));
-    };
-    input_shape = c->MakeShape({{dim('N'), dim('0'), dim('1'), dim('C')}});
-    stride_rows = strides[2];
-    stride_cols = strides[3];
-    kernel_rows = kernel_sizes[2];
-    kernel_cols = kernel_sizes[3];
-  } else {
-    stride_rows = strides[1];
-    stride_cols = strides[2];
-    kernel_rows = kernel_sizes[1];
-    kernel_cols = kernel_sizes[2];
-  }
-
-  DimensionHandle batch_size_dim = c->Dim(input_shape, 0);
-  DimensionHandle in_rows_dim = c->Dim(input_shape, 1);
-  DimensionHandle in_cols_dim = c->Dim(input_shape, 2);
-  DimensionHandle output_depth_dim = c->Dim(input_shape, 3);
+  int32 stride_rows = GetTensorDim(strides, data_format, 'H');
+  int32 stride_cols = GetTensorDim(strides, data_format, 'W');
+  int32 kernel_rows = GetTensorDim(kernel_sizes, data_format, 'H');
+  int32 kernel_cols = GetTensorDim(kernel_sizes, data_format, 'W');
+
+  constexpr int num_spatial_dims = 2;
+  DimensionHandle batch_size_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'N'));
+  DimensionHandle in_rows_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'H'));
+  DimensionHandle in_cols_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'W'));
+  DimensionHandle depth_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'C'));
 
   Padding padding;
   TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
@@ -582,31 +619,34 @@ Status AvgPoolShape(shape_inference::InferenceContext* c) {
       c, in_cols_dim, kernel_cols, stride_cols, padding, &output_cols));
 
   ShapeHandle output_shape;
-  if (data_format == "NCHW") {
-    output_shape = c->MakeShape(
-        {batch_size_dim, output_depth_dim, output_rows, output_cols});
-  } else {
-    output_shape = c->MakeShape(
-        {batch_size_dim, output_rows, output_cols, output_depth_dim});
-  }
-
+  TF_RETURN_IF_ERROR(MakeShapeFromFormat(data_format, batch_size_dim,
+                                         {output_rows, output_cols}, depth_dim,
+                                         &output_shape, c));
   c->set_output(0, output_shape);
   return Status::OK();
 }
 
 Status MaxPoolShape(shape_inference::InferenceContext* c) {
   ShapeHandle input_shape;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 4, &input_shape));
 
-  string data_format;
-  Status s = c->GetAttr("data_format", &data_format);
+  string data_format_str;
+  TensorFormat data_format;
+  Status s = c->GetAttr("data_format", &data_format_str);
+  if (s.ok()) {
+    FormatFromString(data_format_str, &data_format);
+  } else {
+    data_format = FORMAT_NHWC;
+  }
+
+  TF_RETURN_IF_ERROR(
+      CheckFormatConstraintsOnShape(data_format, input_shape, "input", c));
 
   std::vector strides;
   TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides));
   if (strides.size() != 4) {
     return errors::InvalidArgument(
-        "MaxPool requires the stride attribute to contain 4 values, but "
-        "got: ",
+        "MaxPool requires the stride attribute to contain 4 values, but got: ",
         strides.size());
   }
 
@@ -618,35 +658,22 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) {
         kernel_sizes.size());
   }
 
-  int32 stride_rows, stride_cols, stride_depth;
-  int32 kernel_rows, kernel_cols, kernel_depth;
-
-  if (s.ok() && data_format == "NCHW") {
-    // Canonicalize input shape to NHWC so the shape inference code below can
-    // process it.
-    auto dim = [&](char dimension) {
-      return c->Dim(input_shape, GetTensorDimIndex<2>(FORMAT_NCHW, dimension));
-    };
-    input_shape = c->MakeShape({{dim('N'), dim('0'), dim('1'), dim('C')}});
-    stride_depth = strides[1];
-    stride_rows = strides[2];
-    stride_cols = strides[3];
-    kernel_depth = kernel_sizes[1];
-    kernel_rows = kernel_sizes[2];
-    kernel_cols = kernel_sizes[3];
-  } else {
-    stride_rows = strides[1];
-    stride_cols = strides[2];
-    stride_depth = strides[3];
-    kernel_rows = kernel_sizes[1];
-    kernel_cols = kernel_sizes[2];
-    kernel_depth = kernel_sizes[3];
-  }
-
-  DimensionHandle batch_size_dim = c->Dim(input_shape, 0);
-  DimensionHandle in_rows_dim = c->Dim(input_shape, 1);
-  DimensionHandle in_cols_dim = c->Dim(input_shape, 2);
-  DimensionHandle in_depth_dim = c->Dim(input_shape, 3);
+  int32 stride_depth = GetTensorDim(strides, data_format, 'C');
+  int32 stride_rows = GetTensorDim(strides, data_format, 'H');
+  int32 stride_cols = GetTensorDim(strides, data_format, 'W');
+  int32 kernel_depth = GetTensorDim(kernel_sizes, data_format, 'C');
+  int32 kernel_rows = GetTensorDim(kernel_sizes, data_format, 'H');
+  int32 kernel_cols = GetTensorDim(kernel_sizes, data_format, 'W');
+
+  constexpr int num_spatial_dims = 2;
+  DimensionHandle batch_size_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'N'));
+  DimensionHandle in_rows_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'H'));
+  DimensionHandle in_cols_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'W'));
+  DimensionHandle in_depth_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'C'));
 
   Padding padding;
   TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
@@ -660,15 +687,9 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) {
   TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims(
       c, in_depth_dim, kernel_depth, stride_depth, padding, &output_depth));
 
-  output_shape =
-      c->MakeShape({batch_size_dim, output_rows, output_cols, output_depth});
-  if (data_format == "NCHW") {
-    // Convert output shape back to expected NCHW data format.
-    auto dim = [&](char dimension) {
-      return c->Dim(output_shape, GetTensorDimIndex<2>(FORMAT_NHWC, dimension));
-    };
-    output_shape = c->MakeShape({{dim('N'), dim('C'), dim('0'), dim('1')}});
-  }
+  TF_RETURN_IF_ERROR(MakeShapeFromFormat(data_format, batch_size_dim,
+                                         {output_rows, output_cols},
+                                         output_depth, &output_shape, c));
 
   c->set_output(0, output_shape);
   return Status::OK();
diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc
index 416478f854..14f6c1bb45 100644
--- a/tensorflow/core/framework/common_shape_fns_test.cc
+++ b/tensorflow/core/framework/common_shape_fns_test.cc
@@ -696,8 +696,15 @@ TEST(CommonShapeFnsTest, AvgPool2DShapeTest) {
   set_op({{1, 1, 1, 2}}, {1, 1, 2, 1}, "VALID", "NCHW");
   INFER_OK(op, "[1,1,4,4]", "[d0_0,d0_1,3,2]");
 
+  // 5x7 input, 2x2 ksize, 1x1 stride, NCHW_VECT_C test
+  set_op({{1, 1, 1, 1}}, {1, 1, 2, 2}, "VALID", "NCHW_VECT_C");
+  INFER_OK(op, "[2,3,5,7,4]", "[d0_0,d0_1,4,6,4]");
+  INFER_OK(op, "[5,7,?,?,4]", "[d0_0,d0_1,?,?,4]");
+  INFER_OK(op, "[?,?,?,?,4]", "[d0_0,d0_1,?,?,4]");
+  INFER_ERROR("Dimension must be 4 but is 3", op, "[2,5,7,11,3]");
+
   // Invalid rank for input
-  INFER_ERROR("must be rank 4", op, "[4,4]");
+  INFER_ERROR("must be at least rank 4", op, "[4,4]");
 }
 
 TEST(CommonShapeFnsTest, MaxPool2DShapeTest) {
@@ -725,6 +732,13 @@ TEST(CommonShapeFnsTest, MaxPool2DShapeTest) {
   // depth 3 stride, 1x1x1 filter, NCHW
   set_op({1, 3, 1, 1}, {1, 1, 1, 1}, "VALID", "NCHW");
   INFER_OK(op, "[1,7,5,5]", "[d0_0,3,5,5]");
+
+  // 5x7 input, 2x2 ksize, 1x1 stride, NCHW_VECT_C tests
+  set_op({{1, 1, 1, 1}}, {1, 1, 2, 2}, "SAME", "NCHW_VECT_C");
+  INFER_OK(op, "[2,3,5,7,4]", "[d0_0,d0_1,d0_2,d0_3,4]");
+  INFER_OK(op, "[5,7,?,?,4]", "[d0_0,d0_1,d0_2,d0_3,4]");
+  INFER_OK(op, "[?,?,?,?,4]", "[d0_0,d0_1,d0_2,d0_3,4]");
+  INFER_ERROR("Dimension must be 4 but is 8", op, "[2,3,5,7,8]");
 }
 
 TEST(CommonShapeFnsTest, Pool3DShapeTest) {
-- 
GitLab


From 1ccd885712fc1779f55099238f795e9278b235b8 Mon Sep 17 00:00:00 2001
From: Amit Patankar 
Date: Tue, 29 Aug 2017 10:01:07 -0700
Subject: [PATCH 060/294] Updating docs to show support for Python 3.6 in
 Windows. (#12687)

* Updating docs to show support for Python 3.6 in Windows.

* Nit: Fix typo.
---
 tensorflow/docs_src/install/install_windows.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md
index be6a490ff9..3025c9971a 100644
--- a/tensorflow/docs_src/install/install_windows.md
+++ b/tensorflow/docs_src/install/install_windows.md
@@ -71,12 +71,14 @@ Use that package at your own risk.
 
 ## Installing with native pip
 
-If the following version of Python is not installed on your machine,
+If one of the following versions of Python is not installed on your machine,
 install it now:
 
   * [Python 3.5.x 64-bit from python.org](https://www.python.org/downloads/release/python-352/)
+  * [Python 3.6.x 64-bit from python.org](https://www.python.org/downloads/release/python-362/)
 
-Note that Python 3.5.x comes with the pip3 package manager, which is the
+-TensorFlow supports Python 3.5.x and 3.6.x on Windows.
+Note that Python 3 comes with the pip3 package manager, which is the
 program you'll use to install TensorFlow.
 
 To install TensorFlow, start a terminal. Then issue the appropriate
-- 
GitLab


From f8caee4ac4d1212483063f25aae28e6c135e137c Mon Sep 17 00:00:00 2001
From: Yun Peng 
Date: Tue, 29 Aug 2017 19:05:05 +0200
Subject: [PATCH 061/294] Fix TensorFlow Windows build (#12603)

---
 configure.py                  | 10 +++++-----
 tensorflow/python/eager/BUILD |  5 +++++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/configure.py b/configure.py
index 1a0f71ed94..186fdc9ddc 100644
--- a/configure.py
+++ b/configure.py
@@ -143,7 +143,7 @@ def run_shell(cmd, allow_non_zero=False):
 
 def cygpath(path):
   """Convert path from posix to windows."""
-  return run_shell(['cygpath', '-m', path])
+  return os.path.abspath(path).replace('\\', '/')
 
 
 def get_python_path(environ_cp, python_bin_path):
@@ -196,7 +196,7 @@ def setup_python(environ_cp, bazel_version):
     environ_cp['PYTHON_BIN_PATH'] = ''
 
   # Convert python path to Windows style before checking lib and version
-  if is_cygwin():
+  if is_windows() or is_cygwin():
     python_bin_path = cygpath(python_bin_path)
 
   # Get PYTHON_LIB_PATH
@@ -219,7 +219,7 @@ def setup_python(environ_cp, bazel_version):
   python_major_version = get_python_major_version(python_bin_path)
 
   # Convert python path to Windows style before writing into bazel.rc
-  if is_cygwin():
+  if is_windows() or is_cygwin():
     python_lib_path = cygpath(python_lib_path)
 
   # Set-up env variables used by python_configure.bzl
@@ -600,7 +600,7 @@ def set_tf_cuda_version(environ_cp):
 
     # Find out where the CUDA toolkit is installed
     default_cuda_path = _DEFAULT_CUDA_PATH
-    if is_cygwin():
+    if is_windows() or is_cygwin():
       default_cuda_path = cygpath(
           environ_cp.get('CUDA_PATH', _DEFAULT_CUDA_PATH_WIN))
     elif is_linux():
@@ -660,7 +660,7 @@ def set_tf_cunn_version(environ_cp):
     # unusable. Going through one more level of expansion to handle that.
     cudnn_install_path = os.path.realpath(
         os.path.expanduser(cudnn_install_path))
-    if is_cygwin():
+    if is_windows() or is_cygwin():
       cudnn_install_path = cygpath(cudnn_install_path)
 
     if is_windows():
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index ecb0ca9dfd..dd5d9a729f 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -121,6 +121,7 @@ cuda_py_test(
         "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:training",
     ],
+    tags = ["no_windows"],
 )
 
 cuda_py_test(
@@ -152,6 +153,7 @@ cuda_py_test(
         "//tensorflow/python:clip_ops",
         "//tensorflow/python:math_ops",
     ],
+    tags = ["no_windows"],
 )
 
 py_library(
@@ -354,6 +356,7 @@ py_test(
     name = "benchmarks_test",
     srcs = ["benchmarks_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_windows"],
     deps = [
         ":backprop",
         ":context",
@@ -371,6 +374,7 @@ py_test(
     name = "tape_test",
     srcs = ["tape_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_windows"],
     deps = [
         ":backprop",
         ":context",
@@ -391,6 +395,7 @@ py_test(
     name = "tensor_node_test",
     srcs = ["tensor_node_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["no_windows"],
     deps = [
         ":tensor",
         ":tensor_node",
-- 
GitLab


From 5234d453855815957c533921db4f4d5d7c328b37 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 10:27:21 -0700
Subject: [PATCH 062/294] Using -fomit-frame-pointer on ARM to make sure
 Assembly has enough registers.

PiperOrigin-RevId: 166866143
---
 tensorflow/contrib/android/cmake/CMakeLists.txt | 2 +-
 tensorflow/tensorflow.bzl                       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt
index f61e9560ef..11e2128d72 100644
--- a/tensorflow/contrib/android/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/android/cmake/CMakeLists.txt
@@ -36,7 +36,7 @@ set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION
 # Change to compile flags should be replicated into bazel build file
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \
                      -std=c++11 -fno-rtti -fno-exceptions \
-                     -O2 -Wno-narrowing \
+                     -O2 -Wno-narrowing -fomit-frame-pointer \
                      -mfpu=neon -mfloat-abi=softfp -fPIE \
                      -ftemplate-depth=900 \
                      -DGOOGLE_PROTOBUF_NO_RTTI \
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index f1e153e4c5..f0301937fb 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -167,7 +167,7 @@ def tf_copts():
       "-fno-exceptions",
       "-ftemplate-depth=900",
   ]) + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1", "-fopenmp",]) + if_android_arm(
-      ["-mfpu=neon"]) + if_linux_x86_64(["-msse3"]) + select({
+      ["-mfpu=neon", "-fomit-frame-pointer"]) + if_linux_x86_64(["-msse3"]) + select({
           clean_dep("//tensorflow:android"): [
               "-std=c++11",
               "-DTF_LEAN_BINARY",
-- 
GitLab


From e87b2be0e68412e6a72e5b7184968e1e0b1f9178 Mon Sep 17 00:00:00 2001
From: Alexandre Passos 
Date: Tue, 29 Aug 2017 11:12:52 -0700
Subject: [PATCH 063/294] Does not silently turn None into NaNs when
 aggregating gradients.

PiperOrigin-RevId: 166873653
---
 tensorflow/python/eager/backprop_test.py | 5 +++++
 tensorflow/python/eager/tensor_node.py   | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 37007378a0..863a43f859 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -23,6 +23,7 @@ from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import tape
 from tensorflow.python.eager import tensor
+from tensorflow.python.eager import tensor_node
 from tensorflow.python.eager import test
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_shape
@@ -74,6 +75,10 @@ class BackpropTest(test.TestCase):
 
       self.assertAllClose(grad.numpy(), tf_dense_grad.eval())
 
+  def testTensoVspaceNoneMutAdd(self):
+    t = tensor.Tensor(1.0)
+    self.assertEqual(tensor_node.TensorVSpace(t).mut_add(t, None).numpy(), 1.0)
+
   def testImplicitGradWithResourceVariable(self):
     x = resource_variable_ops.ResourceVariable(
         initial_value=tensor.Tensor(1.0), name='x')
diff --git a/tensorflow/python/eager/tensor_node.py b/tensorflow/python/eager/tensor_node.py
index ae5afaa970..331bf7eef8 100644
--- a/tensorflow/python/eager/tensor_node.py
+++ b/tensorflow/python/eager/tensor_node.py
@@ -300,6 +300,10 @@ class TensorVSpace(ag_core.VSpace):
       x = _indexed_slices_to_tensor(x)
     if isinstance(y, ops.IndexedSlices):
       y = _indexed_slices_to_tensor(y)
+    if x is None:
+      return y
+    if y is None:
+      return x
     return math_ops.add(x, y)
 
 
-- 
GitLab


From ecdadc84bd5b2eb09a76979c9632d66f7fd08d2c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 11:13:22 -0700
Subject: [PATCH 064/294] Add a `corpus_size` argument to
 tf.contrib.text.skip_gram_sample_with_text_vocab op. This argument mirrors
 `corpus_size` argument used by tf.contrib.text.skip_gram_sample op.

It allows vocab subsampling to work properly when vocabulary files have been preprocessed to eliminate all infrequent tokens (where frequency < vocab_min_count) to save the memory for those unused tokens. This preprocessing will help to train skip-gram models on a large vocabulary where memory usage of the internal lookup table could become a performance bottleneck.

If `corpus_size` is needed but not supplied, then it will be calculated from `vocab_freq_file`, which is the same as the existing implementation.

PiperOrigin-RevId: 166873744
---
 .../contrib/text/python/ops/skip_gram_ops.py  | 25 ++++++++++-
 .../text/python/ops/skip_gram_ops_test.py     | 41 +++++++++++++++++--
 2 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/text/python/ops/skip_gram_ops.py b/tensorflow/contrib/text/python/ops/skip_gram_ops.py
index 410ee517e0..7ed45031a3 100644
--- a/tensorflow/contrib/text/python/ops/skip_gram_ops.py
+++ b/tensorflow/contrib/text/python/ops/skip_gram_ops.py
@@ -216,6 +216,7 @@ def skip_gram_sample_with_text_vocab(input_tensor,
                                      vocab_delimiter=",",
                                      vocab_min_count=0,
                                      vocab_subsampling=None,
+                                     corpus_size=None,
                                      min_skips=1,
                                      max_skips=5,
                                      start=0,
@@ -267,6 +268,18 @@ def skip_gram_sample_with_text_vocab(input_tensor,
       frequently will be randomly down-sampled. Reasonable starting values may
       be around 1e-3 or 1e-5. See Eq. 5 in http://arxiv.org/abs/1310.4546 for
       more details.
+    corpus_size: (Optional) `int`, `float`, or scalar `Tensor` specifying the
+      total number of tokens in the corpus (e.g., sum of all the frequency
+      counts of `vocab_freq_file`). Used with `vocab_subsampling` for
+      down-sampling frequently occurring tokens. If this is specified,
+      `vocab_freq_file` and `vocab_subsampling` must also be specified.
+      If `corpus_size` is needed but not supplied, then it will be calculated
+      from `vocab_freq_file`. You might want to supply your own value if you
+      have already eliminated infrequent tokens from your vocabulary files
+      (where frequency < vocab_min_count) to save memory in the internal token
+      lookup table. Otherwise, the unused tokens' variables will waste memory.
+      The user-supplied `corpus_size` value must be greater than or equal to the
+      sum of all the frequency counts of `vocab_freq_file`.
     min_skips: `int` or scalar `Tensor` specifying the minimum window size to
       randomly use for each token. Must be >= 0 and <= `max_skips`. If
       `min_skips` and `max_skips` are both 0, the only label outputted will be
@@ -316,7 +329,7 @@ def skip_gram_sample_with_text_vocab(input_tensor,
   # Iterates through the vocab file and calculates the number of vocab terms as
   # well as the total corpus size (by summing the frequency counts of all the
   # vocab terms).
-  corpus_size = 0.0
+  calculated_corpus_size = 0.0
   vocab_size = 0
   with gfile.GFile(vocab_freq_file, mode="r") as f:
     reader = csv.reader(f, delimiter=vocab_delimiter)
@@ -334,7 +347,15 @@ def skip_gram_sample_with_text_vocab(input_tensor,
             format(freq, row))
       # Note: tokens whose frequencies are below vocab_min_count will still
       # contribute to the total corpus size used for vocab subsampling.
-      corpus_size += freq
+      calculated_corpus_size += freq
+
+  if not corpus_size:
+    corpus_size = calculated_corpus_size
+  elif calculated_corpus_size - corpus_size > 1e-6:
+    raise ValueError(
+        "`corpus_size`={} must be greater than or equal to the sum of all the "
+        "frequency counts ({}) of `vocab_freq_file` ({}).".format(
+            corpus_size, calculated_corpus_size, vocab_freq_file))
 
   vocab_freq_table = lookup.HashTable(
       lookup.TextFileInitializer(
diff --git a/tensorflow/contrib/text/python/ops/skip_gram_ops_test.py b/tensorflow/contrib/text/python/ops/skip_gram_ops_test.py
index d989942f73..84e36146d5 100644
--- a/tensorflow/contrib/text/python/ops/skip_gram_ops_test.py
+++ b/tensorflow/contrib/text/python/ops/skip_gram_ops_test.py
@@ -470,7 +470,7 @@ class SkipGramOpsTest(test.TestCase):
       self.assertAllEqual(expected_labels, labels.eval())
 
   def _text_vocab_subsample_vocab_helper(self, vocab_freq_file, vocab_min_count,
-                                         vocab_freq_dtype):
+                                         vocab_freq_dtype, corpus_size=None):
     # The outputs are non-deterministic, so set random seed to help ensure that
     # the outputs remain constant for testing.
     random_seed.set_random_seed(42)
@@ -499,6 +499,7 @@ class SkipGramOpsTest(test.TestCase):
         vocab_freq_dtype=vocab_freq_dtype,
         vocab_min_count=vocab_min_count,
         vocab_subsampling=0.05,
+        corpus_size=corpus_size,
         min_skips=1,
         max_skips=1,
         seed=123)
@@ -523,10 +524,27 @@ class SkipGramOpsTest(test.TestCase):
     # the: 30
     # to: 20
     # universe: 2
+    #
+    # corpus_size for the above vocab is 40+8+30+20+2 = 100.
+    text_vocab_freq_file = self._make_text_vocab_freq_file()
     self._text_vocab_subsample_vocab_helper(
-        vocab_freq_file=self._make_text_vocab_freq_file(),
+        vocab_freq_file=text_vocab_freq_file,
         vocab_min_count=3,
         vocab_freq_dtype=dtypes.int64)
+    self._text_vocab_subsample_vocab_helper(
+        vocab_freq_file=text_vocab_freq_file,
+        vocab_min_count=3,
+        vocab_freq_dtype=dtypes.int64,
+        corpus_size=100)
+
+    # The user-supplied corpus_size should not be less than the sum of all
+    # the frequency counts of vocab_freq_file, which is 100.
+    with self.assertRaises(ValueError):
+      self._text_vocab_subsample_vocab_helper(
+          vocab_freq_file=text_vocab_freq_file,
+          vocab_min_count=3,
+          vocab_freq_dtype=dtypes.int64,
+          corpus_size=99)
 
   def test_skip_gram_sample_with_text_vocab_subsample_vocab_float(self):
     """Tests skip-gram sampling with text vocab and subsampling with floats."""
@@ -536,10 +554,27 @@ class SkipGramOpsTest(test.TestCase):
     # the: 0.3
     # to: 0.2
     # universe: 0.02
+    #
+    # corpus_size for the above vocab is 0.4+0.08+0.3+0.2+0.02 = 1.
+    text_vocab_float_file = self._make_text_vocab_float_file()
     self._text_vocab_subsample_vocab_helper(
-        vocab_freq_file=self._make_text_vocab_float_file(),
+        vocab_freq_file=text_vocab_float_file,
         vocab_min_count=0.03,
         vocab_freq_dtype=dtypes.float32)
+    self._text_vocab_subsample_vocab_helper(
+        vocab_freq_file=text_vocab_float_file,
+        vocab_min_count=0.03,
+        vocab_freq_dtype=dtypes.float32,
+        corpus_size=1.0)
+
+    # The user-supplied corpus_size should not be less than the sum of all
+    # the frequency counts of vocab_freq_file, which is 1.
+    with self.assertRaises(ValueError):
+      self._text_vocab_subsample_vocab_helper(
+          vocab_freq_file=text_vocab_float_file,
+          vocab_min_count=0.03,
+          vocab_freq_dtype=dtypes.float32,
+          corpus_size=0.99)
 
   def test_skip_gram_sample_with_text_vocab_errors(self):
     """Tests various errors raised by skip_gram_sample_with_text_vocab()."""
-- 
GitLab


From d236d19f7753ae23f91d794701efa70ace1629da Mon Sep 17 00:00:00 2001
From: Vladimir Moskva 
Date: Tue, 29 Aug 2017 20:24:10 +0200
Subject: [PATCH 065/294] Update rules_closure to 0.4.2 (#12685)

---
 WORKSPACE | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/WORKSPACE b/WORKSPACE
index 5e9b991fcc..a0fe67bf31 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -2,11 +2,11 @@ workspace(name = "org_tensorflow")
 
 http_archive(
     name = "io_bazel_rules_closure",
-    sha256 = "bc41b80486413aaa551860fc37471dbc0666e1dbb5236fb6177cb83b0c105846",
-    strip_prefix = "rules_closure-dec425a4ff3faf09a56c85d082e4eed05d8ce38f",
+    sha256 = "25f5399f18d8bf9ce435f85c6bbf671ec4820bc4396b3022cc5dc4bc66303609",
+    strip_prefix = "rules_closure-0.4.2",
     urls = [
-        "http://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/dec425a4ff3faf09a56c85d082e4eed05d8ce38f.tar.gz",  # 2017-06-02
-        "https://github.com/bazelbuild/rules_closure/archive/dec425a4ff3faf09a56c85d082e4eed05d8ce38f.tar.gz",
+        "http://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/0.4.2.tar.gz",  # 2017-08-29
+        "https://github.com/bazelbuild/rules_closure/archive/0.4.2.tar.gz",
     ],
 )
 
-- 
GitLab


From a7aa47fd9f23e91ab84eb0f75ff6a8d5b6b95930 Mon Sep 17 00:00:00 2001
From: Shanqing Cai 
Date: Tue, 29 Aug 2017 11:13:44 -0700
Subject: [PATCH 066/294] Add tfdbg support to slim.learning.train

Fixes: #12385
PiperOrigin-RevId: 166873806
---
 tensorflow/contrib/slim/BUILD                 |  2 +
 .../contrib/slim/python/slim/learning.py      |  9 ++++
 .../contrib/slim/python/slim/learning_test.py | 41 +++++++++++++++++++
 .../docs_src/programmers_guide/debugger.md    | 19 +++++++++
 4 files changed, 71 insertions(+)

diff --git a/tensorflow/contrib/slim/BUILD b/tensorflow/contrib/slim/BUILD
index a76e037c5f..8f920c9b03 100644
--- a/tensorflow/contrib/slim/BUILD
+++ b/tensorflow/contrib/slim/BUILD
@@ -88,6 +88,8 @@ py_test(
         "//tensorflow/python:summary",
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
+        "//tensorflow/python/debug:debug_data",
+        "//tensorflow/python/debug:dumping_wrapper",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py
index c7614fd426..5ee014a1f1 100644
--- a/tensorflow/contrib/slim/python/slim/learning.py
+++ b/tensorflow/contrib/slim/python/slim/learning.py
@@ -551,6 +551,7 @@ def train(train_op,
           save_interval_secs=600,
           sync_optimizer=None,
           session_config=None,
+          session_wrapper=None,
           trace_every_n_steps=None):
   """Runs a training loop using a TensorFlow supervisor.
 
@@ -607,6 +608,10 @@ def train(train_op,
       If left as `None`, gradient updates will be asynchronous.
     session_config: An instance of `tf.ConfigProto` that will be used to
       configure the `Session`. If left as `None`, the default will be used.
+    session_wrapper: A function that takes a `tf.Session` object as the only
+      argument and returns a wrapped session object that has the same methods
+      that the original object has, or `None`. Iff not `None`, the wrapped
+      object will be used for training.
     trace_every_n_steps: produce and save a `Timeline` in Chrome trace format
       and add it to the summaries every `trace_every_n_steps`. If None, no trace
       information will be produced or saved.
@@ -736,6 +741,10 @@ def train(train_op,
       with sv.managed_session(
           master, start_standard_services=False, config=session_config) as sess:
         logging.info('Starting Session.')
+        if session_wrapper is not None:
+          logging.info(
+              'Wrapping session with wrapper function: %s', session_wrapper)
+          sess = session_wrapper(sess)
         if is_chief:
           if logdir:
             sv.start_standard_services(sess)
diff --git a/tensorflow/contrib/slim/python/slim/learning_test.py b/tensorflow/contrib/slim/python/slim/learning_test.py
index 3ee2434c02..4e816f9b11 100644
--- a/tensorflow/contrib/slim/python/slim/learning_test.py
+++ b/tensorflow/contrib/slim/python/slim/learning_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import glob
 import os
 import tempfile
 
@@ -30,6 +31,8 @@ from tensorflow.contrib.losses.python.losses import loss_ops
 from tensorflow.contrib.slim.python.slim import learning
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
+from tensorflow.python.debug.lib import debug_data
+from tensorflow.python.debug.wrappers import dumping_wrapper as dumping_wrapper_lib
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -43,6 +46,7 @@ from tensorflow.python.training import gradient_descent
 from tensorflow.python.training import input as input_lib
 from tensorflow.python.training import saver as saver_lib
 
+
 class ClipGradientNormsTest(test.TestCase):
 
   def clip_values(self, arr):
@@ -489,6 +493,43 @@ class TrainTest(test.TestCase):
     self.assertIsNotNone(loss)
     self.assertLess(loss, .015)
 
+  def testTrainWithSessionWrapper(self):
+    """Test that slim.learning.train can take `session_wrapper` args.
+
+    One of the applications of `session_wrapper` is the wrappers of TensorFlow
+    Debugger (tfdbg), which intercept methods calls to `tf.Session` (e.g., run)
+    to achieve debugging. `DumpingDebugWrapperSession` is used here for testing
+    purpose.
+    """
+    dump_root = tempfile.mkdtemp()
+    def dumping_wrapper(sess):  # pylint: disable=invalid-name
+      return dumping_wrapper_lib.DumpingDebugWrapperSession(sess, dump_root)
+
+    with ops.Graph().as_default():
+      random_seed.set_random_seed(0)
+      tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
+      tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32)
+
+      tf_predictions = LogisticClassifier(tf_inputs)
+      loss_ops.log_loss(tf_predictions, tf_labels)
+      total_loss = loss_ops.get_total_loss()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0)
+
+      train_op = learning.create_train_op(total_loss, optimizer)
+
+      loss = learning.train(
+          train_op,
+          None,
+          number_of_steps=1,
+          session_wrapper=dumping_wrapper)
+    self.assertIsNotNone(loss)
+
+    run_root = glob.glob(os.path.join(dump_root, 'run_*'))[-1]
+    dump = debug_data.DebugDumpDir(run_root)
+    self.assertAllEqual(
+        0, dump.get_tensors('global_step', 0, 'DebugIdentity')[0])
+
   def testTrainWithTrace(self):
     logdir = os.path.join(
         tempfile.mkdtemp(prefix=self.get_temp_dir()), 'tmp_logs')
diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md
index 3f31feb706..f40b1e5eb3 100644
--- a/tensorflow/docs_src/programmers_guide/debugger.md
+++ b/tensorflow/docs_src/programmers_guide/debugger.md
@@ -499,6 +499,25 @@ keras_backend.set_session(tf_debug.LocalCLIDebugWrapperSession(tf.Session()))
 model.fit(...)  # This will break into the TFDBG CLI.
 ```
 
+## Debugging tf-slim with TFDBG
+
+TFDBG currently supports only training with
+[tf-slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim).
+To debug the training process, provide `LocalCLIDebugWrapperSession` to the
+`session_wrapper` argument of `slim.learning.train()`. For example:
+
+``` python
+import tensorflow as tf
+from tensorflow.python import debug as tf_debug
+
+# ... Code that creates the graph and the train_op ...
+tf.contrib.slim.learning_train(
+    train_op,
+    logdir,
+    number_of_steps=10,
+    session_wrapper=tf_debug.LocalCLIDebugWrapperSession)
+```
+
 ## Offline Debugging of Remotely-Running Sessions
 
 Often, your model is running on a remote machine or a process that you don't
-- 
GitLab


From ccc81808f6f5e2b1f593ea668ec1d3d89710da8c Mon Sep 17 00:00:00 2001
From: Benoit Steiner 
Date: Tue, 29 Aug 2017 11:31:51 -0700
Subject: [PATCH 067/294] Annotate the performance database with expected
 distribution of values.

PiperOrigin-RevId: 166876854
---
 .../grappler/costs/op_performance_data.proto     | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tensorflow/core/grappler/costs/op_performance_data.proto b/tensorflow/core/grappler/costs/op_performance_data.proto
index 0d6b337d5a..1a111b71dc 100644
--- a/tensorflow/core/grappler/costs/op_performance_data.proto
+++ b/tensorflow/core/grappler/costs/op_performance_data.proto
@@ -48,6 +48,16 @@ message OpInfo {
   DeviceProperties device = 4;
 }
 
+message NormalDistribution {
+  double mu = 1;
+  double sigma = 2;
+}
+
+message LogNormalDistribution {
+  double mu = 1;
+  double sigma = 2;
+}
+
 // Performance data for tensorflow operations
 message OpPerformance {
   // The op
@@ -75,6 +85,12 @@ message OpPerformance {
   // Percentage of theoretical memory performance.
   double memory_efficiency = 8;
 
+  // Expected execution time, modeled using one of 2 possible distributions.
+  oneof execution_time {
+    NormalDistribution execution_time_normal = 10;
+    LogNormalDistribution execution_time_log_normal = 11;
+  };
+
   // Memory usage data for a tensorflow operation.
   message OpMemory {
     // The output information may have memory usage and output shapes.
-- 
GitLab


From a7b524eb2af6fbc5f7b05e305cd16c9d7f37cf4f Mon Sep 17 00:00:00 2001
From: Alexandre Passos 
Date: Tue, 29 Aug 2017 11:43:15 -0700
Subject: [PATCH 068/294] Making tf.pad eager-safe.

PiperOrigin-RevId: 166878740
---
 .../python/kernel_tests/array_ops_test.py     | 14 +++++++++++
 tensorflow/python/ops/array_ops.py            | 25 ++++++++++---------
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 42e63d8b81..392639fa17 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -1020,5 +1020,19 @@ class IdentityTest(test_util.TensorFlowTestCase):
         _test(d, e, "gpu")
 
 
+class PadTest(test_util.TensorFlowTestCase):
+
+  def testEager(self):
+    with context.eager_mode():
+      t = constant_op.constant([[1, 2, 3], [4, 5, 6]])
+      paddings = constant_op.constant([[1, 1,], [2, 2]])
+      padded = array_ops.pad(t, paddings, "CONSTANT")
+      self.assertAllEqual(padded.numpy(),
+                          [[0, 0, 0, 0, 0, 0, 0],
+                           [0, 0, 1, 2, 3, 0, 0],
+                           [0, 0, 4, 5, 6, 0, 0],
+                           [0, 0, 0, 0, 0, 0, 0]])
+
+
 if __name__ == "__main__":
   test_lib.main()
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 39609255b1..2b9306e874 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1726,18 +1726,19 @@ def pad(tensor, paddings, mode="CONSTANT", name=None, constant_values=0):  # pyl
     raise ValueError("Unknown padding mode: %s" % mode)
 
   # Restore shape information where possible.
-  paddings_constant = tensor_util.constant_value(
-      result.op.inputs[1], partial=True)
-  input_shape = result.op.inputs[0].shape
-  if (input_shape.ndims is not None and not result.shape.is_fully_defined() and
-      paddings_constant is not None):
-    new_shape = []
-    for padding, dim in zip(paddings_constant, input_shape.as_list()):
-      if padding is None or dim is None or not all(padding):
-        new_shape.append(None)
-      else:
-        new_shape.append(sum(padding) + dim)
-    result.set_shape(new_shape)
+  if context.in_graph_mode():
+    paddings_constant = tensor_util.constant_value(
+        result.op.inputs[1], partial=True)
+    input_shape = result.op.inputs[0].shape
+    if (input_shape.ndims is not None and not result.shape.is_fully_defined()
+        and paddings_constant is not None):
+      new_shape = []
+      for padding, dim in zip(paddings_constant, input_shape.as_list()):
+        if padding is None or dim is None or not all(padding):
+          new_shape.append(None)
+        else:
+          new_shape.append(sum(padding) + dim)
+      result.set_shape(new_shape)
 
   return result
 
-- 
GitLab


From 4bebf824f56fa1d4fc4bfdc99d9075ad6c1de7ca Mon Sep 17 00:00:00 2001
From: Igor Ganichev 
Date: Tue, 29 Aug 2017 12:37:32 -0700
Subject: [PATCH 069/294] Make adding function/gradient definitions atomic

PiperOrigin-RevId: 166886623
---
 tensorflow/core/framework/function.cc      |  88 +++++++++++++++-
 tensorflow/core/framework/function.h       |  21 ++++
 tensorflow/core/framework/function_test.cc | 117 +++++++++++++++++++++
 3 files changed, 221 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index 0fdc2c820c..c2d3f37ab3 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc
@@ -908,6 +908,13 @@ const FunctionDef* FunctionLibraryDefinition::Find(const string& name) const {
 }
 
 Status FunctionLibraryDefinition::AddFunctionDef(const FunctionDef& fdef) {
+  bool added;
+  return AddFunctionDefHelper(fdef, &added);
+}
+
+Status FunctionLibraryDefinition::AddFunctionDefHelper(const FunctionDef& fdef,
+                                                       bool* added) {
+  *added = false;
   std::unique_ptr* entry =
       &function_defs_[fdef.signature().name()];
   if (*entry != nullptr) {
@@ -927,10 +934,18 @@ Status FunctionLibraryDefinition::AddFunctionDef(const FunctionDef& fdef) {
         "' because an op with the same name already exists.");
   }
   entry->reset(new FunctionDefAndOpRegistration(fdef));
+  *added = true;
   return Status::OK();
 }
 
 Status FunctionLibraryDefinition::AddGradientDef(const GradientDef& grad) {
+  bool added;
+  return AddGradientDefHelper(grad, &added);
+}
+
+Status FunctionLibraryDefinition::AddGradientDefHelper(const GradientDef& grad,
+                                                       bool* added) {
+  *added = false;
   string* entry = &func_grad_[grad.function_name()];
   if (!entry->empty()) {
     if (*entry != grad.gradient_func()) {
@@ -943,35 +958,98 @@ Status FunctionLibraryDefinition::AddGradientDef(const GradientDef& grad) {
     return Status::OK();
   }
   *entry = grad.gradient_func();
+  *added = true;
   return Status::OK();
 }
 
-// TODO(skyewm): don't modify FunctionLibraryDefinition in case of error
 Status FunctionLibraryDefinition::AddLibrary(
     const FunctionLibraryDefinition& other) {
+  // Remember the funcs and grads that we added successfully so that
+  // we can roll them back on error.
+  std::vector funcs;
+  std::vector funcs_with_grads;
+  Status s;
+  bool added;
   for (auto iter : other.function_defs_) {
-    TF_RETURN_IF_ERROR(AddFunctionDef(iter.second->fdef));
+    s = AddFunctionDefHelper(iter.second->fdef, &added);
+    if (!s.ok()) {
+      Remove(funcs, funcs_with_grads);
+      return s;
+    }
+    if (added) {
+      funcs.push_back(iter.second->fdef.signature().name());
+    }
   }
   for (auto iter : other.func_grad_) {
     GradientDef grad;
     grad.set_function_name(iter.first);
     grad.set_gradient_func(iter.second);
-    TF_RETURN_IF_ERROR(AddGradientDef(grad));
+    s = AddGradientDefHelper(grad, &added);
+    if (!s.ok()) {
+      Remove(funcs, funcs_with_grads);
+      return s;
+    }
+    if (added) {
+      funcs_with_grads.push_back(grad.function_name());
+    }
   }
   return Status::OK();
 }
 
 Status FunctionLibraryDefinition::AddLibrary(
     const FunctionDefLibrary& lib_def) {
+  // Remember the funcs and grads that we added successfully so that
+  // we can roll them back on error.
+  std::vector funcs;
+  std::vector funcs_with_grads;
+  Status s;
+  bool added;
   for (const FunctionDef& fdef : lib_def.function()) {
-    TF_RETURN_IF_ERROR(AddFunctionDef(fdef));
+    s = AddFunctionDefHelper(fdef, &added);
+    if (!s.ok()) {
+      Remove(funcs, funcs_with_grads);
+      return s;
+    }
+    if (added) {
+      funcs.push_back(fdef.signature().name());
+    }
   }
   for (const GradientDef& grad : lib_def.gradient()) {
-    TF_RETURN_IF_ERROR(AddGradientDef(grad));
+    s = AddGradientDefHelper(grad, &added);
+    if (!s.ok()) {
+      Remove(funcs, funcs_with_grads);
+      return s;
+    }
+    if (added) {
+      funcs_with_grads.push_back(grad.function_name());
+    }
   }
   return Status::OK();
 }
 
+void FunctionLibraryDefinition::RemoveFunction(const string& func) {
+  const auto& i = function_defs_.find(func);
+  DCHECK(i != function_defs_.end());
+  function_defs_.erase(i);
+}
+
+void FunctionLibraryDefinition::RemoveGradient(const string& func) {
+  const auto& i = func_grad_.find(func);
+  DCHECK(i != func_grad_.end());
+  func_grad_.erase(i);
+}
+
+void FunctionLibraryDefinition::Remove(
+    const std::vector& funcs,
+    const std::vector& funcs_with_grads) {
+  for (const string& f : funcs) {
+    RemoveFunction(f);
+  }
+  for (const string& f : funcs_with_grads) {
+    RemoveGradient(f);
+  }
+}
+
 string FunctionLibraryDefinition::FindGradient(const string& func) const {
   return gtl::FindWithDefault(func_grad_, func, "");
 }
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index 87bc5520f4..317707644b 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -292,20 +292,24 @@ class FunctionLibraryDefinition : public OpRegistryInterface {
   // 'fdef' already exists in this function library.
   // If 'fdef' is successfully added to the library, it will be accessible
   // from 'LookUp' and included in the proto returned by 'ToProto'.
+  // This operation is atomic.
   Status AddFunctionDef(const FunctionDef& fdef);
 
   // Adds gradient definition 'grad' to this function library.
   // This is a no-op if 'grad' already exists in this function library.
   // If 'grad' is successfully added, it will be accessible via 'FindGradient'
   // and included in the proto returned by 'ToProto'.
+  // This operation is atomic.
   Status AddGradientDef(const GradientDef& grad);
 
   // Adds the functions and gradients in 'other' to this function library.
   // Duplicate functions and gradients are ignored.
+  // This operation is atomic.
   Status AddLibrary(const FunctionLibraryDefinition& other);
 
   // Adds the functions and gradients in 'lib_def' to this function library.
   // Duplicate functions and gradients are ignored.
+  // This operation is atomic.
   Status AddLibrary(const FunctionDefLibrary& lib_def);
 
   // If the gradient function for 'func' is specified explicitly in
@@ -353,6 +357,11 @@ class FunctionLibraryDefinition : public OpRegistryInterface {
     OpRegistrationData op_registration_data;
   };
 
+  // Same as AddFunctionDef/AddGradientDef except these methods set
+  // `added` to true if the `fdef`/`grad` were actually added to this.
+  Status AddFunctionDefHelper(const FunctionDef& fdef, bool* added);
+  Status AddGradientDefHelper(const GradientDef& grad, bool* added);
+
   const OpRegistryInterface* const default_registry_;
   gtl::FlatMap>
       function_defs_;
@@ -361,6 +370,18 @@ class FunctionLibraryDefinition : public OpRegistryInterface {
   // Helper function for GetAttr. Returns the FunctionDef* to get the
   // attr from.
   const FunctionDef* GetAttrImpl(const NodeDef& ndef) const;
+
+  // Remove function `func` from the library. `func` must be in the library.
+  void RemoveFunction(const string& func);
+
+  // Remove gradient of function `func` from the library. `func` must have
+  // a gradient.
+  void RemoveGradient(const string& func);
+
+  // Remove all functions in `funcs` and all gradients of
+  // functions in `funcs_with_grads` from this library.
+  void Remove(const std::vector& funcs,
+              const std::vector& funcs_with_grads);
 };
 
 // Forward declare. Defined in common_runtime/function.h
diff --git a/tensorflow/core/framework/function_test.cc b/tensorflow/core/framework/function_test.cc
index 8e15bf04ab..13955addb5 100644
--- a/tensorflow/core/framework/function_test.cc
+++ b/tensorflow/core/framework/function_test.cc
@@ -1054,6 +1054,123 @@ TEST(FunctionLibraryDefinitionTest, AddLibrary) {
   TF_EXPECT_OK(lib_def.AddLibrary(lib_def));
 }
 
+GradientDef MakeGradDef(const string& f, const string& g) {
+  GradientDef grad;
+  grad.set_function_name(f);
+  grad.set_gradient_func(g);
+  return grad;
+}
+
+TEST(FunctionLibraryDefinitionTest, AddLibrary_Atomic) {
+  // Create lib def containing two functions with equal names
+  FunctionDefLibrary proto;
+  const string x2_name = test::function::XTimesTwo().signature().name();
+  const string x4_name = test::function::XTimesFour().signature().name();
+  *proto.add_function() = test::function::XTimesTwo();
+  FunctionDef fdef = test::function::XTimesFour();
+  fdef.mutable_signature()->set_name(x2_name);
+  *proto.add_function() = fdef;
+  FunctionLibraryDefinition lib_def(OpRegistry::Global(), FunctionDefLibrary());
+
+  // Try adding the two functions to lib_def
+  Status s = lib_def.AddLibrary(proto);
+  EXPECT_EQ(error::Code::INVALID_ARGUMENT, s.code());
+  EXPECT_EQ(
+      "Cannot add function 'XTimesTwo' because a different function with "
+      "the same name already exists.",
+      s.error_message());
+
+  // Verify that none of the functions are added
+  EXPECT_TRUE(lib_def.Find(x2_name) == nullptr);
+
+  // Fix the name in proto but add two gradient names for it
+  proto.mutable_function(1)->mutable_signature()->set_name(x4_name);
+  *proto.add_gradient() = MakeGradDef(x2_name, x4_name);
+  *proto.add_gradient() = MakeGradDef(x2_name, "SecondGradName");
+
+  // Try adding the library and check that nothing was added
+  s = lib_def.AddLibrary(proto);
+  EXPECT_EQ(error::Code::INVALID_ARGUMENT, s.code());
+  EXPECT_EQ(s.error_message(),
+            "Cannot assign gradient function 'SecondGradName' to 'XTimesTwo' "
+            "because it already has gradient function 'XTimesFour'");
+  EXPECT_TRUE(lib_def.Find(x2_name) == nullptr);
+  EXPECT_EQ(0, lib_def.ToProto().function_size());
+  EXPECT_EQ(0, lib_def.ToProto().gradient_size());
+}
+
+TEST(FunctionLibraryDefinitionTest, AddLibraryDefinition_Atomic_FuncConflict) {
+  const string x2_name = test::function::XTimesTwo().signature().name();
+  const string x4_name = test::function::XTimesFour().signature().name();
+  const string wx_name = test::function::WXPlusB().signature().name();
+
+  // Create FunctionLibraryDefinition with
+  // (func = XTimesTwo, grad = XTimesFour)
+  FunctionDefLibrary proto;
+  *proto.add_function() = test::function::XTimesTwo();
+  *proto.add_gradient() = MakeGradDef(x2_name, x4_name);
+  FunctionLibraryDefinition lib_def(OpRegistry::Global(), proto);
+  EXPECT_EQ(1, lib_def.ToProto().function_size());
+  EXPECT_EQ(1, lib_def.ToProto().gradient_size());
+
+  // Create FunctionLibraryDefinition with (func = WXPlusB, grad = XTimesTwo)
+  // and function (name = XTimesTwo, body = XTimeFour)
+  FunctionDefLibrary proto2;
+  *proto2.add_function() = test::function::WXPlusB();
+  *proto2.add_gradient() = MakeGradDef(wx_name, x2_name);
+  *proto2.add_function() = test::function::XTimesFour();
+  proto2.mutable_function(1)->mutable_signature()->set_name(x2_name);
+  FunctionLibraryDefinition lib_def2(OpRegistry::Global(), proto2);
+
+  // Verify that adding lib_def2 will fail because of function conflict
+  // and WXPlusB is not added.
+  Status s = lib_def.AddLibrary(lib_def2);
+  EXPECT_EQ(error::Code::INVALID_ARGUMENT, s.code());
+  EXPECT_EQ(
+      "Cannot add function 'XTimesTwo' because a different function "
+      "with the same name already exists.",
+      s.error_message());
+  EXPECT_TRUE(lib_def.Find(wx_name) == nullptr);
+  EXPECT_EQ(1, lib_def.ToProto().function_size());
+  EXPECT_EQ(1, lib_def.ToProto().gradient_size());
+}
+
+TEST(FunctionLibraryDefinitionTest, AddLibraryDefinition_Atomic_GradConflict) {
+  const string x2_name = test::function::XTimesTwo().signature().name();
+  const string x4_name = test::function::XTimesFour().signature().name();
+  const string wx_name = test::function::WXPlusB().signature().name();
+
+  // Create FunctionLibraryDefinition with
+  // (func = XTimesTwo, grad = XTimesFour)
+  FunctionDefLibrary proto;
+  *proto.add_function() = test::function::XTimesTwo();
+  *proto.add_gradient() = MakeGradDef(x2_name, x4_name);
+  FunctionLibraryDefinition lib_def(OpRegistry::Global(), proto);
+  EXPECT_EQ(1, lib_def.ToProto().function_size());
+  EXPECT_EQ(1, lib_def.ToProto().gradient_size());
+
+  // Create FunctionLibraryDefinition with (func = WXPlusB, grad = XTimesTwo)
+  // and (func = XTimesTwo, grad = WXPlusB)
+  FunctionDefLibrary proto2;
+  *proto2.add_function() = test::function::WXPlusB();
+  *proto2.add_gradient() = MakeGradDef(wx_name, x2_name);
+  *proto2.add_function() = test::function::XTimesTwo();
+  *proto2.add_gradient() = MakeGradDef(x2_name, wx_name);
+  FunctionLibraryDefinition lib_def2(OpRegistry::Global(), proto2);
+
+  // Verify that adding lib_def2 will fail because of gradient conflict
+  // and WXPlusB is not added.
+  Status s = lib_def.AddLibrary(lib_def2);
+  EXPECT_EQ(error::Code::INVALID_ARGUMENT, s.code());
+  EXPECT_EQ(
+      "Cannot assign gradient function 'WXPlusB' to 'XTimesTwo'"
+      " because it already has gradient function 'XTimesFour'",
+      s.error_message());
+  EXPECT_TRUE(lib_def.Find(wx_name) == nullptr);
+  EXPECT_EQ(1, lib_def.ToProto().function_size());
+  EXPECT_EQ(1, lib_def.ToProto().gradient_size());
+}
+
 TEST(FunctionLibraryDefinitionTest, ToProto) {
   FunctionDefLibrary proto1;
   *proto1.add_function() = test::function::XTimesTwo();
-- 
GitLab


From 28ea83ce387f3794e22745112fe9bdedb9e5993b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 12:41:10 -0700
Subject: [PATCH 070/294] [XLA] Add an '--xla_reduce_precision' option to
 TF_XLA_FLAGS.

This also adds testing for the more-complicated parsers in debug_options_flags.cc, and corrects a couple of comment typos in parse_flags_from_env_test.cc and in the top of the BUILD file.

PiperOrigin-RevId: 166887094
---
 tensorflow/compiler/xla/legacy_flags/BUILD    |  31 +++-
 .../xla/legacy_flags/debug_options_flags.cc   |  50 +++---
 .../xla/legacy_flags/debug_options_parsers.h  | 151 ++++++++++++++++++
 .../debug_options_parsers_test.cc             | 106 ++++++++++++
 .../legacy_flags/parse_flags_from_env_test.cc |   4 +-
 5 files changed, 313 insertions(+), 29 deletions(-)
 create mode 100644 tensorflow/compiler/xla/legacy_flags/debug_options_parsers.h
 create mode 100644 tensorflow/compiler/xla/legacy_flags/debug_options_parsers_test.cc

diff --git a/tensorflow/compiler/xla/legacy_flags/BUILD b/tensorflow/compiler/xla/legacy_flags/BUILD
index b47c82f075..7977df81f5 100644
--- a/tensorflow/compiler/xla/legacy_flags/BUILD
+++ b/tensorflow/compiler/xla/legacy_flags/BUILD
@@ -1,11 +1,11 @@
-# Legacy command line flags for the XLA libraries.
+# Legacy command-line flags for the XLA libraries.
 
 # Please do not add more flags to this package.
 
-# The XLA libraries were written in an environment that allowed command - line
+# The XLA libraries were written in an environment that allowed command-line
 # flags to be scattered freely throughout the libraries.  This model, while
-# initially convenient, leads to a proliferation in unused commnd line flags in
-# tests and binaries, and serious problems in servers, where one might wish
+# initially convenient, leads to a proliferation in unused command-line flags
+# in tests and binaries, and serious problems in servers, where one might wish
 # parameters to be different in independent RPC calls to the same routine.
 #
 # Please don't add more flags.  If you're a library author, pass options and
@@ -43,17 +43,38 @@ cc_test(
 
 cc_library(
     name = "debug_options_flags",
-    srcs = ["debug_options_flags.cc"],
+    srcs = [
+        "debug_options_flags.cc",
+        "debug_options_parsers.h",
+    ],
     hdrs = ["debug_options_flags.h"],
     deps =
         [
             ":parse_flags_from_env",
             "//tensorflow/compiler/xla:xla_proto",
+            "//tensorflow/compiler/xla/service:hlo",
             "//tensorflow/core:framework_internal",
             "//tensorflow/core:lib",
         ],
 )
 
+cc_test(
+    name = "debug_options_parsers_test",
+    size = "small",
+    srcs = [
+        "debug_options_parsers.h",
+        "debug_options_parsers_test.cc",
+    ],
+    deps =
+        [
+            "//tensorflow/compiler/xla:xla_proto",
+            "//tensorflow/compiler/xla/service:hlo",
+            "//tensorflow/core:framework_internal",
+            "//tensorflow/core:lib",
+            "//tensorflow/core:test",
+        ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
index 34759dbc9b..8892bfbe92 100644
--- a/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/legacy_flags/debug_options_flags.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include   // NOLINT(build/c++11): only using std::call_once, not mutex.
 #include 
+#include "tensorflow/compiler/xla/legacy_flags/debug_options_parsers.h"
 #include "tensorflow/compiler/xla/legacy_flags/parse_flags_from_env.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
@@ -67,7 +68,7 @@ void AllocateFlags() {
     };
   };
 
-  // Returns a lambda that is a custom "sub-parser" for xla_disable_hlo_passes.
+  // Custom "sub-parser" lambda for xla_disable_hlo_passes.
   auto setter_for_xla_disable_hlo_passes = [](string comma_separated_values) {
     std::vector disabled_passes =
         tensorflow::str_util::Split(comma_separated_values, ',');
@@ -77,33 +78,25 @@ void AllocateFlags() {
     return true;
   };
 
-  // Returns a lambda that is a custom "sub-parser" for
-  // xla_backend_extra_options.
+  // Custom "sub-parser" lambda for xla_backend_extra_options.
   auto setter_for_xla_backend_extra_options =
       [](string comma_separated_values) {
-        std::vector extra_options_parts =
-            tensorflow::str_util::Split(comma_separated_values, ',');
         auto* extra_options_map =
             flag_values->mutable_xla_backend_extra_options();
-
-        // The flag contains a comma-separated list of options; some options
-        // have arguments following "=", some don't.
-        for (const auto& part : extra_options_parts) {
-          size_t eq_pos = part.find_first_of('=');
-          if (eq_pos == string::npos) {
-            (*extra_options_map)[part] = "";
-          } else {
-            string value = "";
-            if (eq_pos + 1 < part.size()) {
-              value = part.substr(eq_pos + 1);
-            }
-            (*extra_options_map)[part.substr(0, eq_pos)] = value;
-          }
-        }
-
+        impl::parse_xla_backend_extra_options(extra_options_map,
+                                              comma_separated_values);
         return true;
       };
 
+  // Custom "sub-parser" lambda for xla_reduce_precision.
+  auto setter_for_xla_reduce_precision =
+      [](string reduce_precision_option_value) {
+        HloReducePrecisionOptions* option_proto =
+            flag_values->add_hlo_reduce_precision_options();
+        return impl::parse_xla_reduce_precision_option(
+            option_proto, reduce_precision_option_value);
+      };
+
   flag_objects = new std::vector(
       {tensorflow::Flag(
            "xla_generate_hlo_graph",
@@ -250,7 +243,20 @@ void AllocateFlags() {
                         setter_for_xla_backend_extra_options, "",
                         "Extra options to pass to a backend; "
                         "comma-separated list of 'key=val' strings (=val "
-                        "may be omitted); no whitespace around commas.")});
+                        "may be omitted); no whitespace around commas."),
+       tensorflow::Flag("xla_reduce_precision", setter_for_xla_reduce_precision,
+                        "",
+                        "Directions for adding reduce-precision operations. "
+                        "Format is 'LOCATION=E,M:OPS;NAMES' where LOCATION is "
+                        "the class of locations in which to insert the "
+                        "operations (e.g., 'OP_OUTPUTS'), E and M are the "
+                        "exponent and matissa bit counts respectively, and "
+                        "OPS and NAMES are comma-separated (no spaces) lists "
+                        "of the operation types and names to which to attach "
+                        "the reduce-precision operations.  The NAMES string "
+                        "and its preceding ';' may be omitted.  This option "
+                        "may be repeated to define multiple sets of added "
+                        "reduce-precision operations.")});
   ParseFlagsFromEnv(*flag_objects);
 }
 
diff --git a/tensorflow/compiler/xla/legacy_flags/debug_options_parsers.h b/tensorflow/compiler/xla/legacy_flags/debug_options_parsers.h
new file mode 100644
index 0000000000..0c238e6a5d
--- /dev/null
+++ b/tensorflow/compiler/xla/legacy_flags/debug_options_parsers.h
@@ -0,0 +1,151 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_LEGACY_FLAGS_DEBUG_OPTIONS_PARSERS_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_LEGACY_FLAGS_DEBUG_OPTIONS_PARSERS_H_
+
+#include 
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/xla.pb.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace xla {
+namespace legacy_flags {
+namespace impl {
+
+template 
+void parse_xla_backend_extra_options(T* extra_options_map,
+                                     string comma_separated_values) {
+  std::vector extra_options_parts =
+      tensorflow::str_util::Split(comma_separated_values, ',');
+
+  // The flag contains a comma-separated list of options; some options
+  // have arguments following "=", some don't.
+  for (const auto& part : extra_options_parts) {
+    size_t eq_pos = part.find_first_of('=');
+    if (eq_pos == string::npos) {
+      (*extra_options_map)[part] = "";
+    } else {
+      string value = "";
+      if (eq_pos + 1 < part.size()) {
+        value = part.substr(eq_pos + 1);
+      }
+      (*extra_options_map)[part.substr(0, eq_pos)] = value;
+    }
+  }
+}
+
+// The --xla_reduce_precision option has the format "LOCATION=E,M:OPS;NAME",
+// where LOCATION is an HloReducePrecisionOptions::location, E and M are
+// integers for the exponent and matissa bit counts respectively, and OPS and
+// NAMES are comma-separated of the operation types and names to which to
+// attach the reduce-precision operations.  The OPS values are matches to the
+// strings produced by HloOpcodeString, while the NAME values are arbitrary
+// strings subject to the requirements that they not contain any of "=,:;".
+// The NAME string (with its preceding semicolon) is optional.
+inline bool parse_xla_reduce_precision_option(
+    HloReducePrecisionOptions* options, string option_string) {
+  // Split off "LOCATION" from remainder of string.
+  std::vector eq_split =
+      tensorflow::str_util::Split(option_string, '=');
+  if (eq_split.size() != 2) {
+    return false;
+  }
+  string& location = eq_split[0];
+  if (location == "OP_INPUTS") {
+    options->set_location(HloReducePrecisionOptions::OP_INPUTS);
+  } else if (location == "OP_OUTPUTS") {
+    options->set_location(HloReducePrecisionOptions::OP_OUTPUTS);
+  } else if (location == "UNFUSED_OP_OUTPUTS") {
+    options->set_location(HloReducePrecisionOptions::UNFUSED_OP_OUTPUTS);
+  } else if (location == "FUSION_INPUTS_BY_CONTENT") {
+    options->set_location(HloReducePrecisionOptions::FUSION_INPUTS_BY_CONTENT);
+  } else if (location == "FUSION_OUTPUTS_BY_CONTENT") {
+    options->set_location(HloReducePrecisionOptions::FUSION_OUTPUTS_BY_CONTENT);
+  } else {
+    return false;
+  }
+
+  // Split off "E,M" from remainder of string.
+  std::vector colon_split =
+      tensorflow::str_util::Split(eq_split[1], ':');
+  if (colon_split.size() != 2) {
+    return false;
+  }
+
+  // Split E and M, and parse.
+  std::vector bitsizes;
+  if (!tensorflow::str_util::SplitAndParseAsInts(colon_split[0], ',',
+                                                 &bitsizes) ||
+      bitsizes.size() != 2) {
+    return false;
+  }
+  options->set_exponent_bits(bitsizes[0]);
+  options->set_mantissa_bits(bitsizes[1]);
+
+  // Split off OPS comma-separated list from remainder of string, if the
+  // remainder exists.
+  std::vector semicolon_split =
+      tensorflow::str_util::Split(colon_split[1], ';');
+  if (semicolon_split.size() > 2) {
+    return false;
+  }
+  // The opcode values are either 'all' (meaning all opcodes), or matches to
+  // the strings returned by HloOpcodeString.  An empty string is also
+  // interpreted as 'all', for convenience.  Note that 'all' may not be part
+  // of a comma-separated list; it must stand alone.
+  string& opcode_string = semicolon_split[0];
+  if (opcode_string == "" || opcode_string == "all") {
+    for (int i = 0; i < HloOpcodeCount(); i++) {
+      options->add_opcodes_to_suffix(i);
+    }
+  } else {
+    std::vector opcodes =
+        tensorflow::str_util::Split(opcode_string, ',');
+    for (const string& opcode : opcodes) {
+      bool found = false;
+      for (int i = 0; i < HloOpcodeCount(); i++) {
+        if (opcode == HloOpcodeString(static_cast(i))) {
+          options->add_opcodes_to_suffix(i);
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        return false;
+      }
+    }
+  }
+
+  // Process the NAMES string, if it exists.
+  if (semicolon_split.size() == 2) {
+    std::vector opnames =
+        tensorflow::str_util::Split(semicolon_split[1], ',');
+    for (const string& opname : opnames) {
+      if (opname.length() > 0) {
+        options->add_opname_substrings_to_suffix(opname);
+      }
+    }
+  }
+
+  return true;
+}
+
+}  // namespace impl
+}  // namespace legacy_flags
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_LEGACY_FLAGS_DEBUG_OPTIONS_PARSERS_H_
diff --git a/tensorflow/compiler/xla/legacy_flags/debug_options_parsers_test.cc b/tensorflow/compiler/xla/legacy_flags/debug_options_parsers_test.cc
new file mode 100644
index 0000000000..0ed788a967
--- /dev/null
+++ b/tensorflow/compiler/xla/legacy_flags/debug_options_parsers_test.cc
@@ -0,0 +1,106 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Test for parse_flags_from_env.cc
+
+#include "tensorflow/compiler/xla/legacy_flags/debug_options_parsers.h"
+
+#include 
+#include 
+
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace xla {
+namespace legacy_flags {
+
+// Test that the xla_backend_extra_options flag is parsed correctly.
+TEST(DebugOptionsFlags, ParseXlaBackendExtraOptions) {
+  std::unordered_map test_map;
+  string test_string = "aa=bb,cc,dd=,ee=ff=gg";
+  impl::parse_xla_backend_extra_options(&test_map, test_string);
+  EXPECT_EQ(test_map.size(), 4);
+  EXPECT_EQ(test_map.at("aa"), "bb");
+  EXPECT_EQ(test_map.at("cc"), "");
+  EXPECT_EQ(test_map.at("dd"), "");
+  EXPECT_EQ(test_map.at("ee"), "ff=gg");
+}
+
+// Test that the xla_reduce_precision flag is parsed correctly.
+TEST(DebugOptionsFlags, ParseXlaReducePrecisionOptionNoStrings) {
+  HloReducePrecisionOptions proto;
+  string test_string = "OP_OUTPUTS=5,10:add,dot";
+  EXPECT_TRUE(impl::parse_xla_reduce_precision_option(&proto, test_string));
+  EXPECT_EQ(proto.location(), HloReducePrecisionOptions::OP_OUTPUTS);
+  EXPECT_EQ(proto.exponent_bits(), 5);
+  EXPECT_EQ(proto.mantissa_bits(), 10);
+  EXPECT_EQ(proto.opcodes_to_suffix_size(), 2);
+  EXPECT_EQ(static_cast(proto.opcodes_to_suffix(0)),
+            HloOpcode::kAdd);
+  EXPECT_EQ(static_cast(proto.opcodes_to_suffix(1)),
+            HloOpcode::kDot);
+  EXPECT_EQ(proto.opname_substrings_to_suffix_size(), 0);
+}
+
+TEST(DebugOptionsFlags, ParseXlaReducePrecisionOptionNoStringsSemicolon) {
+  HloReducePrecisionOptions proto;
+  string test_string = "OP_OUTPUTS=5,10:add,dot;";
+  EXPECT_TRUE(impl::parse_xla_reduce_precision_option(&proto, test_string));
+  EXPECT_EQ(proto.location(), HloReducePrecisionOptions::OP_OUTPUTS);
+  EXPECT_EQ(proto.exponent_bits(), 5);
+  EXPECT_EQ(proto.mantissa_bits(), 10);
+  EXPECT_EQ(proto.opcodes_to_suffix_size(), 2);
+  EXPECT_EQ(static_cast(proto.opcodes_to_suffix(0)),
+            HloOpcode::kAdd);
+  EXPECT_EQ(static_cast(proto.opcodes_to_suffix(1)),
+            HloOpcode::kDot);
+  EXPECT_EQ(proto.opname_substrings_to_suffix_size(), 0);
+}
+
+TEST(DebugOptionsFlags, ParseXlaReducePrecisionOptionNoOpcodes) {
+  HloReducePrecisionOptions proto;
+  string test_string = "UNFUSED_OP_OUTPUTS=5,10:;foo,bar/baz";
+  EXPECT_TRUE(impl::parse_xla_reduce_precision_option(&proto, test_string));
+  EXPECT_EQ(proto.location(), HloReducePrecisionOptions::UNFUSED_OP_OUTPUTS);
+  EXPECT_EQ(proto.exponent_bits(), 5);
+  EXPECT_EQ(proto.mantissa_bits(), 10);
+  EXPECT_EQ(proto.opcodes_to_suffix_size(), HloOpcodeCount());
+  EXPECT_EQ(proto.opname_substrings_to_suffix_size(), 2);
+  EXPECT_EQ(proto.opname_substrings_to_suffix(0), "foo");
+  EXPECT_EQ(proto.opname_substrings_to_suffix(1), "bar/baz");
+}
+
+TEST(DebugOptionsFlags, ParseXlaReducePrecisionOptionBoth) {
+  HloReducePrecisionOptions proto;
+  string test_string = "UNFUSED_OP_OUTPUTS=5,10:subtract;foo,bar/baz";
+  EXPECT_TRUE(impl::parse_xla_reduce_precision_option(&proto, test_string));
+  EXPECT_EQ(proto.location(), HloReducePrecisionOptions::UNFUSED_OP_OUTPUTS);
+  EXPECT_EQ(proto.exponent_bits(), 5);
+  EXPECT_EQ(proto.mantissa_bits(), 10);
+  EXPECT_EQ(proto.opcodes_to_suffix_size(), 1);
+  EXPECT_EQ(static_cast(proto.opcodes_to_suffix(0)),
+            HloOpcode::kSubtract);
+  EXPECT_EQ(proto.opname_substrings_to_suffix_size(), 2);
+  EXPECT_EQ(proto.opname_substrings_to_suffix(0), "foo");
+  EXPECT_EQ(proto.opname_substrings_to_suffix(1), "bar/baz");
+}
+
+}  // namespace legacy_flags
+}  // namespace xla
+
+int main(int argc, char* argv[]) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc b/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc
index 07bbcd802f..a3b4286f4c 100644
--- a/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc
+++ b/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc
@@ -86,14 +86,14 @@ static const char kTestFlagString[] =
     "--single_quoted='single quoted \\\\ \n \"' "
     "--double_quoted=\"double quoted \\\\ \n '\\\"\" ";
 
-// Test that the environent variable is parserd correctly.
+// Test that the environent variable is parsed correctly.
 TEST(ParseFlagsFromEnv, Basic) {
   // Prepare environment.
   setenv("TF_XLA_FLAGS", kTestFlagString, true /*overwrite*/);
   TestParseFlagsFromEnv("(flags in environment variable)");
 }
 
-// Test that a file named by the environent variable is parserd correctly.
+// Test that a file named by the environent variable is parsed correctly.
 TEST(ParseFlagsFromEnv, File) {
   // environment variables where  tmp dir may be specified.
   static const char* kTempVars[] = {"TEST_TMPDIR", "TMP"};
-- 
GitLab


From 66cc9e5474c38b1fddb4a192748f4a3248364c49 Mon Sep 17 00:00:00 2001
From: David Majnemer 
Date: Tue, 29 Aug 2017 13:20:36 -0700
Subject: [PATCH 071/294] [XLA] Make RunDFSMemoryScheduler more accurate

Skip parameters and constants, they are long lived in ways which are not
relevant to the analysis. Also, remove superfluous bitcasts so as to make our
ordering more accurate.

PiperOrigin-RevId: 166892521
---
 .../xla/service/algebraic_simplifier.cc         | 16 ++++++++++++++++
 .../compiler/xla/service/hlo_scheduling.cc      | 17 ++++++++++++++---
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index d3649a1ed1..2dcba6996c 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -121,6 +121,8 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault {
   Status HandleAdd(HloInstruction* add, HloInstruction* lhs,
                    HloInstruction* rhs) override;
 
+  Status HandleBitcast(HloInstruction* bitcast) override;
+
   Status HandleBroadcast(HloInstruction* broadcast) override;
 
   Status HandleConcatenate(
@@ -340,6 +342,20 @@ Status AlgebraicSimplifierVisitor::HandleAdd(HloInstruction* add,
   return Status::OK();
 }
 
+Status AlgebraicSimplifierVisitor::HandleBitcast(HloInstruction* bitcast) {
+  // If a bitcast feeds a bitcast, make it a single bitcast.
+  if (bitcast->operand(0)->opcode() == HloOpcode::kBitcast) {
+    return ReplaceWithNewInstruction(
+        bitcast, HloInstruction::CreateUnary(
+                     bitcast->shape(), HloOpcode::kBitcast,
+                     bitcast->mutable_operand(0)->mutable_operand(0)));
+  }
+  // All bitcasts can be eliminated (assuming layout constraints are
+  // satisified).
+  ReplaceInstructionIfSameShape(bitcast, bitcast->mutable_operand(0));
+  return Status::OK();
+}
+
 Status AlgebraicSimplifierVisitor::HandleCopy(HloInstruction* copy) {
   // If a copy feeds a copy, make it a single copy.
   if (copy->operand(0)->opcode() == HloOpcode::kCopy) {
diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc
index 3df760d159..25be448c8d 100644
--- a/tensorflow/compiler/xla/service/hlo_scheduling.cc
+++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc
@@ -72,6 +72,13 @@ class ListScheduler {
     return scheduler.CreateSchedule();
   }
 
+  // Returns whether the memory used by the given HLO should be ignored by the
+  // scheduling heuristic.
+  static bool IgnoreInstruction(const HloInstruction& instruction) {
+    return instruction.opcode() == HloOpcode::kParameter ||
+           instruction.opcode() == HloOpcode::kConstant;
+  }
+
  private:
   // The scheduling priority of an instruction is first the number of bytes
   // freed by scheduling the instruction, and second (tie-breaker) by the number
@@ -127,9 +134,8 @@ class ListScheduler {
 
   // Returns whether the memory used by the given buffer should be ignored by
   // the scheduling heuristic.
-  bool IgnoreBuffer(const LogicalBuffer& buffer) {
-    return buffer.instruction()->opcode() == HloOpcode::kParameter ||
-           buffer.instruction()->opcode() == HloOpcode::kConstant;
+  static bool IgnoreBuffer(const LogicalBuffer& buffer) {
+    return IgnoreInstruction(*buffer.instruction());
   }
 
   // An entry in the worklist used by CreateSchedule.  Corresponds to one
@@ -306,6 +312,11 @@ StatusOr> RunDFSMemoryScheduler(
   tensorflow::gtl::FlatMap extra_users;
   tensorflow::gtl::FlatMap total_sizes;
   for (const HloInstruction* hlo : computation.MakeInstructionPostOrder()) {
+    if (ListScheduler::IgnoreInstruction(*hlo)) {
+      extra_users[hlo] = 0;
+      total_sizes[hlo] = 0;
+      continue;
+    }
     extra_users[hlo] = hlo->users().empty() ? 0 : hlo->users().size() - 1;
     total_sizes[hlo] = SumLogicalBufferSizes(
         points_to_analysis.GetBuffersDefinedByInstruction(hlo), size_function);
-- 
GitLab


From 1019621df2b8e7f516be13de3fbb4fb2833a686a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 14:03:01 -0700
Subject: [PATCH 072/294] Automated g4 rollback of changelist 166487124

PiperOrigin-RevId: 166899472
---
 tensorflow/core/kernels/BUILD                 |  10 +-
 tensorflow/core/kernels/adjust_hsv_gpu.cu.h   | 146 ++++++++++++++++++
 .../core/kernels/adjust_hue_op_gpu.cu.cc      | 100 +-----------
 .../core/kernels/adjust_saturation_op.cc      |  38 +++++
 .../core/kernels/adjust_saturation_op.h       |  40 +++++
 .../kernels/adjust_saturation_op_gpu.cu.cc    |  44 ++++++
 tensorflow/python/ops/image_ops_test.py       |  14 +-
 7 files changed, 287 insertions(+), 105 deletions(-)
 create mode 100644 tensorflow/core/kernels/adjust_hsv_gpu.cu.h
 create mode 100644 tensorflow/core/kernels/adjust_saturation_op.h
 create mode 100644 tensorflow/core/kernels/adjust_saturation_op_gpu.cu.cc

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index b024cefb34..cf089a40ba 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1837,16 +1837,22 @@ tf_kernel_library(
     deps = IMAGE_DEPS,
 )
 
+cc_library(
+    name = "adjust_hsv_gpu_lib",
+    hdrs = ["adjust_hsv_gpu.cu.h"],
+    deps = ["//tensorflow/core:framework"],
+)
+
 tf_kernel_library(
     name = "adjust_hue_op",
     prefix = "adjust_hue_op",
-    deps = IMAGE_DEPS,
+    deps = IMAGE_DEPS + [":adjust_hsv_gpu_lib"],
 )
 
 tf_kernel_library(
     name = "adjust_saturation_op",
     prefix = "adjust_saturation_op",
-    deps = IMAGE_DEPS,
+    deps = IMAGE_DEPS + [":adjust_hsv_gpu_lib"],
 )
 
 tf_kernel_library(
diff --git a/tensorflow/core/kernels/adjust_hsv_gpu.cu.h b/tensorflow/core/kernels/adjust_hsv_gpu.cu.h
new file mode 100644
index 0000000000..c160ce2c33
--- /dev/null
+++ b/tensorflow/core/kernels/adjust_hsv_gpu.cu.h
@@ -0,0 +1,146 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_ADJUST_HSV_GPU_CU_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_ADJUST_HSV_GPU_CU_H_
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/types.h"
+
+namespace tensorflow {
+namespace internal {
+
+typedef struct RgbTuple {
+  float r;
+  float g;
+  float b;
+} RgbTuple;
+
+typedef struct HsvTuple {
+  float h;
+  float s;
+  float v;
+} HsvTuple;
+
+inline __device__ HsvTuple rgb2hsv_cuda(const float r, const float g,
+                                        const float b) {
+  HsvTuple tuple;
+  const float M = fmaxf(r, fmaxf(g, b));
+  const float m = fminf(r, fminf(g, b));
+  const float chroma = M - m;
+  float h = 0.0f, s = 0.0f;
+  // hue
+  if (chroma > 0.0f) {
+    if (M == r) {
+      const float num = (g - b) / chroma;
+      const float sign = copysignf(1.0f, num);
+      h = ((sign < 0.0f) * 6.0f + sign * fmodf(sign * num, 6.0f)) / 6.0f;
+    } else if (M == g) {
+      h = ((b - r) / chroma + 2.0f) / 6.0f;
+    } else {
+      h = ((r - g) / chroma + 4.0f) / 6.0f;
+    }
+  } else {
+    h = 0.0f;
+  }
+  // saturation
+  if (M > 0.0) {
+    s = chroma / M;
+  } else {
+    s = 0.0f;
+  }
+  tuple.h = h;
+  tuple.s = s;
+  tuple.v = M;
+  return tuple;
+}
+
+inline __device__ RgbTuple hsv2rgb_cuda(const float h, const float s,
+                                        const float v) {
+  RgbTuple tuple;
+  const float new_h = h * 6.0f;
+  const float chroma = v * s;
+  const float x = chroma * (1.0f - fabsf(fmodf(new_h, 2.0f) - 1.0f));
+  const float new_m = v - chroma;
+  const bool between_0_and_1 = new_h >= 0.0f && new_h < 1.0f;
+  const bool between_1_and_2 = new_h >= 1.0f && new_h < 2.0f;
+  const bool between_2_and_3 = new_h >= 2.0f && new_h < 3.0f;
+  const bool between_3_and_4 = new_h >= 3.0f && new_h < 4.0f;
+  const bool between_4_and_5 = new_h >= 4.0f && new_h < 5.0f;
+  const bool between_5_and_6 = new_h >= 5.0f && new_h < 6.0f;
+  tuple.r = chroma * (between_0_and_1 || between_5_and_6) +
+            x * (between_1_and_2 || between_4_and_5) + new_m;
+  tuple.g = chroma * (between_1_and_2 || between_2_and_3) +
+            x * (between_0_and_1 || between_3_and_4) + new_m;
+  tuple.b = chroma * (between_3_and_4 || between_4_and_5) +
+            x * (between_2_and_3 || between_5_and_6) + new_m;
+  return tuple;
+}
+
+template 
+__global__ void adjust_hsv_nhwc(const int64 number_elements,
+                                const float* const __restrict__ input,
+                                float* const output,
+                                const float* const hue_delta,
+                                const float* const saturation_scale,
+                                const float* const value_scale) {
+  // multiply by 3 since we're dealing with contiguous RGB bytes for each pixel
+  // (NHWC)
+  const int64 idx = (blockDim.x * blockIdx.x + threadIdx.x) * 3;
+  // bounds check
+  if (idx > number_elements - 1) {
+    return;
+  }
+  if (!AdjustHue && !AdjustSaturation && !AdjustV) {
+    output[idx] = input[idx];
+    output[idx + 1] = input[idx + 1];
+    output[idx + 2] = input[idx + 2];
+    return;
+  }
+  const HsvTuple hsv = rgb2hsv_cuda(input[idx], input[idx + 1], input[idx + 2]);
+  float new_h = hsv.h;
+  float new_s = hsv.s;
+  float new_v = hsv.v;
+  // hue adjustment
+  if (AdjustHue) {
+    const float delta = *hue_delta;
+    new_h = fmodf(hsv.h + delta, 1.0f);
+    if (new_h < 0.0f) {
+      new_h = fmodf(1.0f + new_h, 1.0f);
+    }
+  }
+  // saturation adjustment
+  if (AdjustSaturation && saturation_scale != nullptr) {
+    const float scale = *saturation_scale;
+    new_s = fminf(1.0f, fmaxf(0.0f, hsv.s * scale));
+  }
+  // value adjustment
+  if (AdjustV && value_scale != nullptr) {
+    const float scale = *value_scale;
+    new_v = hsv.v * scale;
+  }
+  const RgbTuple rgb = hsv2rgb_cuda(new_h, new_s, new_v);
+  output[idx] = rgb.r;
+  output[idx + 1] = rgb.g;
+  output[idx + 2] = rgb.b;
+}
+
+}  // namespace internal
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_ADJUST_HSV_GPU_CU_H_
diff --git a/tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc b/tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc
index 865583c1c3..a4fe5f755c 100644
--- a/tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc
@@ -16,104 +16,11 @@ limitations under the License.
 
 #define EIGEN_USE_GPU
 
-#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/kernels/adjust_hsv_gpu.cu.h"
 #include "tensorflow/core/kernels/adjust_hue_op.h"
 #include "tensorflow/core/util/cuda_kernel_helper.h"
 
 namespace tensorflow {
-namespace internal {
-
-namespace {
-typedef struct RgbTuple {
-  float r;
-  float g;
-  float b;
-} RgbTuple;
-
-typedef struct HsvTuple {
-  float h;
-  float s;
-  float v;
-} HsvTuple;
-}  // namespace
-
-__device__ HsvTuple rgb2hsv_cuda(const float r, const float g, const float b) {
-  HsvTuple tuple;
-  const float M = fmaxf(r, fmaxf(g, b));
-  const float m = fminf(r, fminf(g, b));
-  const float chroma = M - m;
-  float h = 0.0f, s = 0.0f;
-  // hue
-  if (chroma > 0.0f) {
-    if (M == r) {
-      const float num = (g - b) / chroma;
-      const float sign = copysignf(1.0f, num);
-      h = ((sign < 0.0f) * 6.0f + sign * fmodf(sign * num, 6.0f)) / 6.0f;
-    } else if (M == g) {
-      h = ((b - r) / chroma + 2.0f) / 6.0f;
-    } else {
-      h = ((r - g) / chroma + 4.0f) / 6.0f;
-    }
-  } else {
-    h = 0.0f;
-  }
-  // saturation
-  if (M > 0.0) {
-    s = chroma / M;
-  } else {
-    s = 0.0f;
-  }
-  tuple.h = h;
-  tuple.s = s;
-  tuple.v = M;
-  return tuple;
-}
-
-__device__ RgbTuple hsv2rgb_cuda(const float h, const float s, const float v) {
-  RgbTuple tuple;
-  const float new_h = h * 6.0f;
-  const float chroma = v * s;
-  const float x = chroma * (1.0f - fabsf(fmodf(new_h, 2.0f) - 1.0f));
-  const float new_m = v - chroma;
-  const bool between_0_and_1 = new_h >= 0.0f && new_h < 1.0f;
-  const bool between_1_and_2 = new_h >= 1.0f && new_h < 2.0f;
-  const bool between_2_and_3 = new_h >= 2.0f && new_h < 3.0f;
-  const bool between_3_and_4 = new_h >= 3.0f && new_h < 4.0f;
-  const bool between_4_and_5 = new_h >= 4.0f && new_h < 5.0f;
-  const bool between_5_and_6 = new_h >= 5.0f && new_h < 6.0f;
-  tuple.r = chroma * (between_0_and_1 || between_5_and_6) +
-            x * (between_1_and_2 || between_4_and_5) + new_m;
-  tuple.g = chroma * (between_1_and_2 || between_2_and_3) +
-            x * (between_0_and_1 || between_3_and_4) + new_m;
-  tuple.b = chroma * (between_3_and_4 || between_4_and_5) +
-            x * (between_2_and_3 || between_5_and_6) + new_m;
-  return tuple;
-}
-
-__global__ void adjust_hue_nhwc(const int64 number_elements,
-                                const float* const __restrict__ input,
-                                float* const output,
-                                const float* const hue_delta) {
-  // multiply by 3 since we're dealing with contiguous RGB bytes for each pixel
-  // (NHWC)
-  const int64 idx = (blockDim.x * blockIdx.x + threadIdx.x) * 3;
-  // bounds check
-  if (idx > number_elements - 1) {
-    return;
-  }
-  const float delta = hue_delta[0];
-  const HsvTuple hsv = rgb2hsv_cuda(input[idx], input[idx + 1], input[idx + 2]);
-  // hue adjustment
-  float new_h = fmodf(hsv.h + delta, 1.0f);
-  if (new_h < 0.0f) {
-    new_h = fmodf(1.0f + new_h, 1.0f);
-  }
-  const RgbTuple rgb = hsv2rgb_cuda(new_h, hsv.s, hsv.v);
-  output[idx] = rgb.r;
-  output[idx + 1] = rgb.g;
-  output[idx + 2] = rgb.b;
-}
-}  // namespace internal
 
 namespace functor {
 
@@ -126,8 +33,9 @@ void AdjustHueGPU::operator()(GPUDevice* device, const int64 number_of_elements,
   const int threads_per_block = config.thread_per_block;
   const int block_count =
       (number_of_elements + threads_per_block - 1) / threads_per_block;
-  internal::adjust_hue_nhwc<<>>(
-      number_of_elements, input, output, delta);
+  internal::adjust_hsv_nhwc
+      <<>>(
+          number_of_elements, input, output, delta, nullptr, nullptr);
 }
 }  // namespace functor
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/adjust_saturation_op.cc b/tensorflow/core/kernels/adjust_saturation_op.cc
index 34a6581514..4643d4e6ef 100644
--- a/tensorflow/core/kernels/adjust_saturation_op.cc
+++ b/tensorflow/core/kernels/adjust_saturation_op.cc
@@ -12,6 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#define EIGEN_USE_THREADS
+
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif
+
+#include "tensorflow/core/kernels/adjust_saturation_op.h"
 #include 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -206,4 +213,35 @@ class AdjustSaturationOp : public AdjustSaturationOpBase {
 REGISTER_KERNEL_BUILDER(Name("AdjustSaturation").Device(DEVICE_CPU),
                         AdjustSaturationOp);
 
+#if GOOGLE_CUDA
+template <>
+class AdjustSaturationOp : public AdjustSaturationOpBase {
+ public:
+  explicit AdjustSaturationOp(OpKernelConstruction* context)
+      : AdjustSaturationOpBase(context) {}
+
+  void DoCompute(OpKernelContext* context,
+                 const ComputeOptions& options) override {
+    const Tensor* input = options.input;
+    const Tensor* scale = options.scale;
+    Tensor* output = options.output;
+    const int64 number_of_elements = input->NumElements();
+    GPUDevice device = context->eigen_gpu_device();
+    const auto stream = device.stream();
+    OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
+    if (number_of_elements > 0) {
+      const float* input_data = input->flat().data();
+      const float* scale_data = scale->flat().data();
+      float* const output_data = output->flat().data();
+      functor::AdjustSaturationGPU()(&device, number_of_elements, input_data,
+                                     scale_data, output_data);
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("AdjustSaturation").Device(DEVICE_GPU),
+                        AdjustSaturationOp);
+
+#endif
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/adjust_saturation_op.h b/tensorflow/core/kernels/adjust_saturation_op.h
new file mode 100644
index 0000000000..05c45c07c3
--- /dev/null
+++ b/tensorflow/core/kernels/adjust_saturation_op.h
@@ -0,0 +1,40 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef _TENSORFLOW_CORE_KERNELS_ADJUST_SATURATION_OP_H
+#define _TENSORFLOW_CORE_KERNELS_ADJUST_SATURATION_OP_H
+
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#include "tensorflow/core/framework/types.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+
+struct AdjustSaturationGPU {
+  void operator()(GPUDevice* device, const int64 number_of_elements,
+                  const float* const input, const float* const scale,
+                  float* const output);
+};
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+#endif  // _TENSORFLOW_CORE_KERNELS_ADJUST_SATURATION_OP_H
diff --git a/tensorflow/core/kernels/adjust_saturation_op_gpu.cu.cc b/tensorflow/core/kernels/adjust_saturation_op_gpu.cu.cc
new file mode 100644
index 0000000000..37cfb26a47
--- /dev/null
+++ b/tensorflow/core/kernels/adjust_saturation_op_gpu.cu.cc
@@ -0,0 +1,44 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/kernels/adjust_hsv_gpu.cu.h"
+#include "tensorflow/core/kernels/adjust_saturation_op.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+
+namespace functor {
+
+void AdjustSaturationGPU::operator()(GPUDevice* device,
+                                     const int64 number_of_elements,
+                                     const float* const input,
+                                     const float* const scale,
+                                     float* const output) {
+  const auto stream = device->stream();
+  const CudaLaunchConfig config =
+      GetCudaLaunchConfig(number_of_elements, *device);
+  const int threads_per_block = config.thread_per_block;
+  const int block_count =
+      (number_of_elements + threads_per_block - 1) / threads_per_block;
+  internal::adjust_hsv_nhwc
+      <<>>(
+          number_of_elements, input, output, nullptr, scale, nullptr);
+}
+}  // namespace functor
+}  // namespace tensorflow
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 19a65037e9..6356d88403 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -386,11 +386,11 @@ class AdjustHueBenchmark(test.Benchmark):
           sess.run(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
-    tag = "%s" % (cpu_count) if cpu_count is not None else "_all"
-    print("benchmarkAdjustHue_299_299_3_cpu%s step_time: %.2f us" %
+    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
+    print("benchmarkAdjustHue_299_299_3_%s step_time: %.2f us" %
           (tag, step_time * 1e6))
     self.report_benchmark(
-        name="benchmarkAdjustHue_299_299_3_cpu%s" % (tag),
+        name="benchmarkAdjustHue_299_299_3_%s" % (tag),
         iters=benchmark_rounds,
         wall_time=step_time)
 
@@ -432,11 +432,11 @@ class AdjustSaturationBenchmark(test.Benchmark):
           sess.run(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
-    tag = "%s" % (cpu_count) if cpu_count is not None else "_all"
-    print("benchmarkAdjustSaturation_599_599_3_cpu%s step_time: %.2f us" %
+    tag = device + "_%s" % (cpu_count if cpu_count is not None else "_all")
+    print("benchmarkAdjustSaturation_299_299_3_%s step_time: %.2f us" %
           (tag, step_time * 1e6))
     self.report_benchmark(
-        name="benchmarkAdjustSaturation_599_599_3_cpu%s" % (tag),
+        name="benchmarkAdjustSaturation_299_299_3_%s" % (tag),
         iters=benchmark_rounds,
         wall_time=step_time)
 
@@ -704,7 +704,7 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase):
         "gb_same",
         "rgb_same",
     ]
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       for x_shape in x_shapes:
         for test_style in test_styles:
           x_np = np.random.rand(*x_shape) * 255.
-- 
GitLab


From dee5007d2440d87592e7359c87bcd2d3b40f914b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 14:13:12 -0700
Subject: [PATCH 073/294] Better handling of default dtypes for eager.

PiperOrigin-RevId: 166901236
---
 tensorflow/python/eager/BUILD       |  1 +
 tensorflow/python/eager/execute.py  |  7 +------
 tensorflow/python/eager/ops_test.py | 14 ++++++++++++++
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index ecb0ca9dfd..4bb810a6c8 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -405,6 +405,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":context",
+        ":execute",
         ":tensor",
         ":test",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py
index 2223e9833a..2b5a76ca12 100644
--- a/tensorflow/python/eager/execute.py
+++ b/tensorflow/python/eager/execute.py
@@ -188,17 +188,12 @@ def args_to_matching_eager(l, default_dtype=None):
       dtype = t.dtype
       break
 
-  if dtype is None:
-    # TODO(josh11b): At the moment, I don't think this can fail, but at some
-    # point we likely should have some logic to prevent bad conversions.
-    dtype = default_dtype
-
   if dtype is None:
     # Infer a dtype based on the first value, and use that dtype for the
     # remaining values.
     ret = []
     for t in l:
-      ret.append(ops.convert_to_tensor(t, dtype))
+      ret.append(ops.convert_to_tensor(t, dtype, preferred_dtype=default_dtype))
       if dtype is None:
         dtype = ret[-1].dtype
   else:
diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py
index d1cb3508cf..ffd5b39b85 100644
--- a/tensorflow/python/eager/ops_test.py
+++ b/tensorflow/python/eager/ops_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.eager import context
+from tensorflow.python.eager import execute
 from tensorflow.python.eager import tensor
 from tensorflow.python.eager import test
 from tensorflow.python.framework import dtypes
@@ -295,6 +296,19 @@ class TargetTest(test_util.TensorFlowTestCase):
     self.assertLess(x.numpy(), 6)
     self.assertGreaterEqual(x.numpy(), 5)
 
+  def testArgsToMatchingEagerDefault(self):
+    # Uses default
+    t, r = execute.args_to_matching_eager([[3, 4]], dtypes.int32)
+    self.assertEquals(t, dtypes.int32)
+    self.assertEquals(r[0].dtype, dtypes.int32)
+    t, r = execute.args_to_matching_eager([[3, 4]], dtypes.int64)
+    self.assertEquals(t, dtypes.int64)
+    self.assertEquals(r[0].dtype, dtypes.int64)
+    # Doesn't use default
+    t, r = execute.args_to_matching_eager([['string', 'arg']], dtypes.int32)
+    self.assertEquals(t, dtypes.string)
+    self.assertEquals(r[0].dtype, dtypes.string)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From e8ce634f4a42298a8a386635f873017b6e2df8e3 Mon Sep 17 00:00:00 2001
From: Igor Ganichev 
Date: Tue, 29 Aug 2017 14:42:01 -0700
Subject: [PATCH 074/294] Add ArgDefCase conversion to tensorflow::str_util

PiperOrigin-RevId: 166905993
---
 tensorflow/core/lib/strings/str_util.cc      | 52 ++++++++++++++++++++
 tensorflow/core/lib/strings/str_util.h       | 11 +++++
 tensorflow/core/lib/strings/str_util_test.cc | 32 ++++++++++++
 3 files changed, 95 insertions(+)

diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc
index c68e14f09f..1159304724 100644
--- a/tensorflow/core/lib/strings/str_util.cc
+++ b/tensorflow/core/lib/strings/str_util.cc
@@ -248,6 +248,58 @@ string Uppercase(StringPiece s) {
   return result;
 }
 
+string ArgDefCase(StringPiece s) {
+  const int n = s.size();
+
+  // Compute the size of resulting string.
+  // Number of extra underscores we will need to add.
+  int extra_us = 0;
+  // Number of non-alpha chars in the beginning to skip.
+  int to_skip = 0;
+  for (int i = 0; i < n; ++i) {
+    // If we are skipping and current letter is non-alpha, skip it as well
+    if (i == to_skip && !isalpha(s[i])) {
+      ++to_skip;
+      continue;
+    }
+
+    // If we are here, we are not skipping any more.
+    // If this letter is upper case, not the very first char in the
+    // resulting string, and previous letter isn't replaced with an underscore,
+    // we will need to insert an underscore.
+    if (isupper(s[i]) && i != to_skip && i > 0 && isalnum(s[i - 1])) {
+      ++extra_us;
+    }
+  }
+
+  // Initialize result with all '_'s. There is no string
+  // constructor that does not initialize memory.
+  string result(n + extra_us - to_skip, '_');
+  // i - index into s
+  // j - index into result
+  for (int i = to_skip, j = 0; i < n; ++i, ++j) {
+    DCHECK_LT(j, result.size());
+    char c = s[i];
+    // If c is not alphanumeric, we don't need to do anything
+    // since there is already an underscore in its place.
+    if (isalnum(c)) {
+      if (isupper(c)) {
+        // If current char is upper case, we might need to insert an
+        // underscore.
+        if (i != to_skip) {
+          DCHECK_GT(j, 0);
+          if (result[j - 1] != '_') ++j;
+        }
+        result[j] = tolower(c);
+      } else {
+        result[j] = c;
+      }
+    }
+  }
+
+  return result;
+}
+
 void TitlecaseString(string* s, StringPiece delimiters) {
   bool upper = true;
   for (string::iterator ss = s->begin(); ss != s->end(); ++ss) {
diff --git a/tensorflow/core/lib/strings/str_util.h b/tensorflow/core/lib/strings/str_util.h
index 669f0d3c52..8cea0f0718 100644
--- a/tensorflow/core/lib/strings/str_util.h
+++ b/tensorflow/core/lib/strings/str_util.h
@@ -81,6 +81,17 @@ string Lowercase(StringPiece s);
 // Return upper-cased version of s.
 string Uppercase(StringPiece s);
 
+// Converts "^2ILoveYou!" to "i_love_you_". More specifically:
+// - converts all non-alphanumeric characters to underscores
+// - replaces each occurence of a capital letter (except the very
+//   first character and if there is already an '_' before it) with '_'
+//   followed by this letter in lower case
+// - Skips leading non-alpha characters
+// This method is useful for producing strings matching "[a-z][a-z0-9_]*"
+// as required by OpDef.ArgDef.name. The resulting string is either empty or
+// matches this regex.
+string ArgDefCase(StringPiece s);
+
 // Capitalize first character of each word in "*s".  "delimiters" is a
 // set of characters that can be used as word boundaries.
 void TitlecaseString(string* s, StringPiece delimiters);
diff --git a/tensorflow/core/lib/strings/str_util_test.cc b/tensorflow/core/lib/strings/str_util_test.cc
index 040f7447e4..5c735a87a3 100644
--- a/tensorflow/core/lib/strings/str_util_test.cc
+++ b/tensorflow/core/lib/strings/str_util_test.cc
@@ -338,6 +338,38 @@ TEST(Uppercase, Basic) {
   EXPECT_EQ("HELLO WORLD", str_util::Uppercase("Hello World"));
 }
 
+TEST(SnakeCase, Basic) {
+  EXPECT_EQ("", str_util::ArgDefCase(""));
+  EXPECT_EQ("", str_util::ArgDefCase("!"));
+  EXPECT_EQ("", str_util::ArgDefCase("5"));
+  EXPECT_EQ("", str_util::ArgDefCase("!:"));
+  EXPECT_EQ("", str_util::ArgDefCase("5-5"));
+  EXPECT_EQ("", str_util::ArgDefCase("_!"));
+  EXPECT_EQ("", str_util::ArgDefCase("_5"));
+  EXPECT_EQ("a", str_util::ArgDefCase("_a"));
+  EXPECT_EQ("a", str_util::ArgDefCase("_A"));
+  EXPECT_EQ("i", str_util::ArgDefCase("I"));
+  EXPECT_EQ("i", str_util::ArgDefCase("i"));
+  EXPECT_EQ("i_", str_util::ArgDefCase("I%"));
+  EXPECT_EQ("i_", str_util::ArgDefCase("i%"));
+  EXPECT_EQ("i", str_util::ArgDefCase("%I"));
+  EXPECT_EQ("i", str_util::ArgDefCase("-i"));
+  EXPECT_EQ("i", str_util::ArgDefCase("3i"));
+  EXPECT_EQ("i", str_util::ArgDefCase("32i"));
+  EXPECT_EQ("i3", str_util::ArgDefCase("i3"));
+  EXPECT_EQ("i_a3", str_util::ArgDefCase("i_A3"));
+  EXPECT_EQ("i_i", str_util::ArgDefCase("II"));
+  EXPECT_EQ("i_i", str_util::ArgDefCase("I_I"));
+  EXPECT_EQ("i__i", str_util::ArgDefCase("I__I"));
+  EXPECT_EQ("i_i_32", str_util::ArgDefCase("II-32"));
+  EXPECT_EQ("ii_32", str_util::ArgDefCase("Ii-32"));
+  EXPECT_EQ("hi_there", str_util::ArgDefCase("HiThere"));
+  EXPECT_EQ("hi_hi", str_util::ArgDefCase("Hi!Hi"));
+  EXPECT_EQ("hi_hi", str_util::ArgDefCase("HiHi"));
+  EXPECT_EQ("hihi", str_util::ArgDefCase("Hihi"));
+  EXPECT_EQ("hi_hi", str_util::ArgDefCase("Hi_Hi"));
+}
+
 TEST(TitlecaseString, Basic) {
   string s = "sparse_lookup";
   str_util::TitlecaseString(&s, "_");
-- 
GitLab


From b386597e71178378405ca4bf9e0b338d4a246290 Mon Sep 17 00:00:00 2001
From: Igor Ganichev 
Date: Tue, 29 Aug 2017 14:43:47 -0700
Subject: [PATCH 075/294] Make node const in tensorflow::OutputTensor

PiperOrigin-RevId: 166906312
---
 tensorflow/core/graph/graph.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index bd388d9065..51ede642d2 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -261,10 +261,10 @@ class Node {
 // that a single `OutputTensor` can correspond to multiple `Edge`s if the output
 // is consumed by multiple destination nodes.
 struct OutputTensor {
-  Node* node;
+  const Node* node;
   int index;
 
-  OutputTensor(Node* n, int i) : node(n), index(i) {}
+  OutputTensor(const Node* n, int i) : node(n), index(i) {}
   OutputTensor() : node(nullptr), index(0) {}
 };
 
-- 
GitLab


From 232a4f3749e2534adb7695a1d24b8b0fbd985039 Mon Sep 17 00:00:00 2001
From: Ben Lee 
Date: Tue, 29 Aug 2017 15:03:29 -0700
Subject: [PATCH 076/294] Add QUINT16 support to Fill

PiperOrigin-RevId: 166909611
---
 tensorflow/core/kernels/constant_op.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index f0052619f0..b4b37dd4b8 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -219,6 +219,7 @@ TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL);
 // TODO(b/28917570): Add a test for this. Currently python 3 is not happy about
 // the conversion from uint8 to quint8.
 REGISTER_KERNEL(CPU, quint8);
+REGISTER_KERNEL(CPU, quint16);
 #undef REGISTER_CPU_KERNEL
 
 #ifdef TENSORFLOW_USE_SYCL
-- 
GitLab


From f282bb142f8d170549f6708f334bed7f40dd029e Mon Sep 17 00:00:00 2001
From: Alexandre Passos 
Date: Tue, 29 Aug 2017 15:06:31 -0700
Subject: [PATCH 077/294] Fix bug with second derivatives in eager mode.

Improper wrapping and unwrapping of tensors lead to tracing being dropped.

PiperOrigin-RevId: 166910119
---
 tensorflow/python/eager/backprop.py        |  3 ++-
 tensorflow/python/eager/backprop_test.py   | 13 +++++++++++
 tensorflow/python/eager/tape.py            | 26 +++++++++++++---------
 tensorflow/python/framework/tensor_util.py |  5 +++--
 tensorflow/python/ops/array_grad.py        |  3 ++-
 tensorflow/python/ops/math_grad.py         |  6 +++++
 tensorflow/python/ops/nn_grad.py           |  4 ++++
 7 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 82ffaccde7..b0e8a41cb5 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import threading
 
+from autograd import container_types
 from autograd import convenience_wrappers
 from autograd import core as ag_core
 
@@ -150,7 +151,7 @@ def _record_gradient(op_name, inputs, attrs, results, name):
   def grad_fn(*outputs):
     """Generated gradient function."""
     tensors = inputs + result_copies + list(outputs)
-    tensors = [ag_core.getval(x) for x in tensors]
+    tensors = container_types.make_sequence(tape.EagerList, *tensors)
     result = _magic_gradient_function(op_name, attrs, len(inputs),
                                       num_outputs, *(tensors))
     if _tracing:
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 863a43f859..0b9a68db67 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -160,6 +160,19 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(second, [0])(f)[0]
     self.assertAllEqual([[0.0]], grad.numpy())
 
+  def testGradGrad(self):
+
+    def sq(x):
+      return x * x
+
+    def grad(x):
+      value = backprop.gradients_function(sq, [0])(x)[0]
+      return value
+
+    gradgrad = backprop.gradients_function(grad, [0])
+
+    self.assertAllEqual(gradgrad(tensor.Tensor(3.0))[0].numpy(), 2.0)
+
   def testGPU(self):
     if not context.context().num_gpus():
       self.skipTest('No GPUs found')
diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py
index f2915eba59..6fd19aee45 100644
--- a/tensorflow/python/eager/tape.py
+++ b/tensorflow/python/eager/tape.py
@@ -101,6 +101,9 @@ class NoneVSpace(ag_core.VSpace):
   def __init__(self, _):
     self.size = 0
 
+  def zeros(self):
+    return 0
+
 
 ag_core.register_vspace(NoneVSpace, type(None))
 
@@ -197,28 +200,32 @@ class _EagerSequenceVSpace(container_types.SequenceVSpace):
     return True
 
 
-class _EagerList(list):
-  """Type used to bypass SequenceVSpace."""
+class EagerList(list):
+  """Type used to bypass SequenceVSpace.
+
+  SequenceVSpace has a very strict equality check which does not match
+  tensorflow semantics.
+  """
 
   def __init__(self, value):
-    super(_EagerList, self).__init__(value)
+    super(EagerList, self).__init__(value)
     for v in value:
       assert not ag_core.isnode(v)
 
-ag_core.register_vspace(_EagerSequenceVSpace, _EagerList)
-ag_core.register_node(_EagerSequenceNode, _EagerList)
+ag_core.register_vspace(_EagerSequenceVSpace, EagerList)
+ag_core.register_node(_EagerSequenceNode, EagerList)
 
 
 @ag_core.primitive
 def _record_operation(output_tensors, input_tensors, side_outputs,
                       backward_function):
   del input_tensors, side_outputs, backward_function
-  return _EagerList(output_tensors)
+  return EagerList(output_tensors)
 
 
 def record_operation(o, i, s, b):
   """Primitive to trigger autograd tracing on outputs from inputs."""
-  inputs = container_types.make_sequence(_EagerList, *i)
+  inputs = container_types.make_sequence(EagerList, *i)
   return _record_operation(o, inputs, s, b)
 
 
@@ -227,9 +234,8 @@ def _record_operation_vjp(g, ans, vs, gvs, output_tensors, input_tensors,
   """Gradient for _record_operation."""
   del ans, vs, gvs, output_tensors, input_tensors
   backward_args = tuple(g) + tuple(side_outputs)
-  if ag_core.isnode(backward_args):
-    backward_args = list(backward_args)
+  backward_args = container_types.make_sequence(EagerList, *backward_args)
   tensors = nest.flatten(backward_function(*backward_args))
-  return _EagerList([ag_core.getval(t) for t in tensors])
+  return container_types.make_sequence(EagerList, *tensors)
 
 _record_operation.defvjp(_record_operation_vjp, argnum=1)
diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index 3bdc721402..686e6a9be4 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from autograd import core as ag_core
 import numpy as np
 import six
 
@@ -605,7 +606,7 @@ def ShapeEquals(tensor_proto, shape):
 
 def _ConstantValue(tensor, partial):
   # TODO(touts): Support Variables?
-  if not isinstance(tensor, ops.Tensor):
+  if not isinstance(ag_core.getval(tensor), ops.Tensor):
     raise TypeError("tensor is not a Tensor")
   if tensor.op.type == "Const":
     return MakeNdarray(tensor.op.get_attr("value"))
@@ -719,7 +720,7 @@ def constant_value(tensor, partial=False):  # pylint: disable=invalid-name
   Raises:
     TypeError: if tensor is not an ops.Tensor.
   """
-  if isinstance(tensor, ops.EagerTensor):
+  if isinstance(ag_core.getval(tensor), ops.EagerTensor):
     return tensor.numpy()
   ret = _ConstantValue(tensor, partial)
   if ret is not None:
diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
index 55b1a7296d..bdc1f40615 100644
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@@ -534,7 +534,8 @@ def _TileGrad(op, grad):
   axes = math_ops.range(0, array_ops.size(split_shape), 2)
   input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes)
   # Fix shape inference
-  input_grad.set_shape(op.inputs[0].get_shape())
+  if context.in_graph_mode():
+    input_grad.set_shape(op.inputs[0].get_shape())
   return [input_grad, None]
 
 
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index 3d70465e68..bf8dc17298 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -750,6 +750,12 @@ def _FloorDivGrad(_, unused_grad):
   return None, None
 
 
+@ops.RegisterGradient("FloorMod")
+def _FloorModGrad(_, unused_grad):
+  """The gradient for the FloorMod operator."""
+  return None, None
+
+
 @ops.RegisterGradient("TruncateDiv")
 def _TruncateDivGrad(_, unused_grad):
   return None, None
diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
index df5e66664f..45d80b43f1 100644
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.eager import context
+from tensorflow.python.eager import tensor
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
@@ -420,6 +422,8 @@ def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_loss, grad_grad):
 
   def IsZero(g):
     # Some introspection to check if the gradient is feeding zeros
+    if context.in_eager_mode():
+      return isinstance(g, tensor.LazyZero)
     if g.op.type in ("ZerosLike", "Zeros"):
       return True
     const_fill_value = tensor_util.constant_value(g)
-- 
GitLab


From 0599901f020dd2e74b93bc4e12364027fbde1f1c Mon Sep 17 00:00:00 2001
From: Mingxing Tan 
Date: Tue, 29 Aug 2017 15:08:38 -0700
Subject: [PATCH 078/294] Add extract_jpeg_shape such that we can get the image
 shape without actually decoding the image.

PiperOrigin-RevId: 166910421
---
 tensorflow/core/kernels/BUILD                 |  8 ++
 .../core/kernels/extract_jpeg_shape_op.cc     | 77 +++++++++++++++++++
 tensorflow/core/ops/image_ops.cc              | 22 ++++++
 tensorflow/core/ops/image_ops_test.cc         | 10 +++
 tensorflow/python/ops/image_ops.py            |  1 +
 tensorflow/python/ops/image_ops_test.py       | 20 +++++
 .../tools/api/golden/tensorflow.image.pbtxt   |  4 +
 7 files changed, 142 insertions(+)
 create mode 100644 tensorflow/core/kernels/extract_jpeg_shape_op.cc

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index cf089a40ba..79a7b8a8b9 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1807,6 +1807,7 @@ cc_library(
         ":draw_bounding_box_op",
         ":encode_jpeg_op",
         ":encode_png_op",
+        ":extract_jpeg_shape_op",
         ":non_max_suppression_op",
         ":random_crop_op",
         ":resize_area_op",
@@ -1903,6 +1904,12 @@ tf_kernel_library(
     deps = IMAGE_DEPS,
 )
 
+tf_kernel_library(
+    name = "extract_jpeg_shape_op",
+    prefix = "extract_jpeg_shape_op",
+    deps = IMAGE_DEPS,
+)
+
 tf_kernel_library(
     name = "non_max_suppression_op",
     prefix = "non_max_suppression_op",
@@ -4651,6 +4658,7 @@ filegroup(
             "decode_image_op.*",
             "encode_png_op.*",
             "encode_jpeg_op.*",
+            "extract_jpeg_shape_op.*",
             "decode_jpeg_op.*",
             "decode_gif_op.*",
             "identity_reader_op.*",
diff --git a/tensorflow/core/kernels/extract_jpeg_shape_op.cc b/tensorflow/core/kernels/extract_jpeg_shape_op.cc
new file mode 100644
index 0000000000..60d798af56
--- /dev/null
+++ b/tensorflow/core/kernels/extract_jpeg_shape_op.cc
@@ -0,0 +1,77 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/image_ops.cc
+
+#include 
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/jpeg/jpeg_mem.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+
+// Extract the shape of a JPEG image.
+template 
+class ExtractJpegShapeOp : public OpKernel {
+ public:
+  explicit ExtractJpegShapeOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    // Get input content.
+    const Tensor& contents = context->input(0);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents.shape()),
+                errors::InvalidArgument("contents must be scalar, got shape ",
+                                        contents.shape().DebugString()));
+    const StringPiece input = contents.scalar()();
+    OP_REQUIRES(context, input.size() <= std::numeric_limits::max(),
+                errors::InvalidArgument("JPEG contents are too large for int: ",
+                                        input.size()));
+
+    // Call GetImageInfo to get image shape.
+    int width, height, components;
+    OP_REQUIRES(
+        context,
+        jpeg::GetImageInfo(input.data(), input.size(), &width, &height,
+                           &components),
+        errors::InvalidArgument("Invalid JPEG data, size ", input.size()));
+    // Allocate tensor and set shape size.
+    Tensor* image_shape = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, TensorShape({3}), &image_shape));
+    auto image_shape_data = image_shape->tensor();
+    image_shape_data(0) = height;
+    image_shape_data(1) = width;
+    image_shape_data(2) = components;
+  }
+};
+
+#define REGISTER_KERNELS(type)                                      \
+  REGISTER_KERNEL_BUILDER(Name("ExtractJpegShape")                  \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint("output_type"), \
+                          ExtractJpegShapeOp)
+
+TF_CALL_int32(REGISTER_KERNELS);
+TF_CALL_int64(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc
index 1bfa37f5a7..8ddf3561ce 100644
--- a/tensorflow/core/ops/image_ops.cc
+++ b/tensorflow/core/ops/image_ops.cc
@@ -457,6 +457,28 @@ xmp_metadata: If not empty, embed this XMP metadata in the image header.
 contents: 0-D. JPEG-encoded image.
 )doc");
 
+// --------------------------------------------------------------------------
+REGISTER_OP("ExtractJpegShape")
+    .Input("contents: string")
+    .Output("image_shape: output_type")
+    .Attr("output_type: {int32, int64} = DT_INT32")
+    .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
+      c->set_output(0, c->Vector(3));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Extract the shape information of a JPEG-encoded image.
+
+This op only parses the image header, so it is much faster than DecodeJpeg.
+
+contents: 0-D. The JPEG-encoded image.
+image_shape: 1-D. The image shape with format [height, width, channels].
+output_type: (Optional) The output type of the operation (int32 or int64).
+    Defaults to int32.
+)doc");
+
 // --------------------------------------------------------------------------
 REGISTER_OP("AdjustContrast")
     .Input("images: T")
diff --git a/tensorflow/core/ops/image_ops_test.cc b/tensorflow/core/ops/image_ops_test.cc
index ea202edfb3..c757cefdda 100644
--- a/tensorflow/core/ops/image_ops_test.cc
+++ b/tensorflow/core/ops/image_ops_test.cc
@@ -101,6 +101,16 @@ TEST(ImageOpsTest, EncodeImage_ShapeFn) {
   }
 }
 
+TEST(ImageOpsTest, ExtractJpegShape_ShapeFn) {
+  ShapeInferenceTestOp op("ExtractJpegShape");
+
+  // Rank check.
+  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "[1]");
+
+  // Only specify input data. Output must be a 1-D tensor with 3 elements.
+  INFER_OK(op, "?", "[3]");
+}
+
 TEST(ImageOpsTest, Colorspace_ShapeFn) {
   for (const char* op_name : {"HSVToRGB", "RGBToHSV"}) {
     ShapeInferenceTestOp op(op_name);
diff --git a/tensorflow/python/ops/image_ops.py b/tensorflow/python/ops/image_ops.py
index 75c67dcb3c..31485ae9d4 100644
--- a/tensorflow/python/ops/image_ops.py
+++ b/tensorflow/python/ops/image_ops.py
@@ -22,6 +22,7 @@ See the @{$python/image} guide.
 @@decode_gif
 @@decode_jpeg
 @@encode_jpeg
+@@extract_jpeg_shape
 @@decode_png
 @@encode_png
 @@decode_image
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 6356d88403..9e656f0e08 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -2455,6 +2455,26 @@ class JpegTest(test_util.TensorFlowTestCase):
         self.assertEqual(image.get_shape().as_list(),
                          [None, None, channels or None])
 
+  def testExtractJpegShape(self):
+    # Read a real jpeg and verify shape.
+    path = ("tensorflow/core/lib/jpeg/testdata/"
+            "jpeg_merge_test1.jpg")
+    with self.test_session(use_gpu=True) as sess:
+      jpeg = io_ops.read_file(path)
+      # Extract shape without decoding.
+      [image_shape] = sess.run([image_ops.extract_jpeg_shape(jpeg)])
+      self.assertEqual(image_shape.tolist(), [256, 128, 3])
+
+  def testExtractJpegShapeforCmyk(self):
+    # Read a cmyk jpeg image, and verify its shape.
+    path = ("tensorflow/core/lib/jpeg/testdata/"
+            "jpeg_merge_test1_cmyk.jpg")
+    with self.test_session(use_gpu=True) as sess:
+      jpeg = io_ops.read_file(path)
+      [image_shape] = sess.run([image_ops.extract_jpeg_shape(jpeg)])
+      # Cmyk jpeg image has 4 channels.
+      self.assertEqual(image_shape.tolist(), [256, 128, 4])
+
 
 class PngTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt
index 661d6fc586..764ffbb4b7 100644
--- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt
@@ -76,6 +76,10 @@ tf_module {
     name: "extract_glimpse"
     argspec: "args=[\'input\', \'size\', \'offsets\', \'centered\', \'normalized\', \'uniform_noise\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'True\', \'True\', \'None\'], "
   }
+  member_method {
+    name: "extract_jpeg_shape"
+    argspec: "args=[\'contents\', \'output_type\', \'name\'], varargs=None, keywords=None, defaults=[\"\", \'None\'], "
+  }
   member_method {
     name: "flip_left_right"
     argspec: "args=[\'image\'], varargs=None, keywords=None, defaults=None"
-- 
GitLab


From 0d655127f44b674bc07a84d69b8f0d767422398b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 15:14:19 -0700
Subject: [PATCH 079/294] Add simple cudaMalloc based allocator useful for
 memory debugging with existing cuda toolchain

PiperOrigin-RevId: 166911293
---
 tensorflow/core/BUILD                         |  2 +
 .../gpu/gpu_cudamalloc_allocator.cc           | 73 +++++++++++++++++++
 .../gpu/gpu_cudamalloc_allocator.h            | 52 +++++++++++++
 .../core/common_runtime/gpu/process_state.cc  | 30 +++++++-
 4 files changed, 155 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
 create mode 100644 tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 03aae9fb74..de9eb057e4 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1926,6 +1926,7 @@ tf_cuda_library(
     name = "gpu_runtime",
     srcs = [
         "common_runtime/gpu/gpu_bfc_allocator.cc",
+        "common_runtime/gpu/gpu_cudamalloc_allocator.cc",
         "common_runtime/gpu/gpu_debug_allocator.cc",
         "common_runtime/gpu/gpu_device.cc",
         "common_runtime/gpu/gpu_device_factory.cc",
@@ -1938,6 +1939,7 @@ tf_cuda_library(
     ],
     hdrs = [
         "common_runtime/gpu/gpu_bfc_allocator.h",
+        "common_runtime/gpu/gpu_cudamalloc_allocator.h",
         "common_runtime/gpu/gpu_debug_allocator.h",
         "common_runtime/gpu/gpu_device.h",
         "common_runtime/gpu/gpu_init.h",
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
new file mode 100644
index 0000000000..70c2d96763
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
@@ -0,0 +1,73 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifdef GOOGLE_CUDA
+#include "cuda/include/cuda.h"
+#include "tensorflow/stream_executor/cuda/cuda_activation.h"
+#endif  // GOOGLE_CUDA
+
+#include "tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h"
+
+#include "tensorflow/core/common_runtime/gpu/gpu_init.h"
+#include "tensorflow/core/platform/stream_executor.h"
+
+namespace gpu = ::perftools::gputools;
+
+namespace tensorflow {
+
+GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator,
+                                               int device_id)
+    : base_allocator_(allocator) {
+  stream_exec_ = GPUMachineManager()->ExecutorForDevice(device_id).ValueOrDie();
+}
+
+GPUcudaMallocAllocator::~GPUcudaMallocAllocator() { delete base_allocator_; }
+
+void* GPUcudaMallocAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
+#ifdef GOOGLE_CUDA
+  // allocate with cudaMalloc
+  gpu::cuda::ScopedActivateExecutorContext scoped_activation{stream_exec_};
+  CUdeviceptr rv = 0;
+  CUresult res = cuMemAlloc(&rv, num_bytes);
+  if (res != CUDA_SUCCESS) {
+    LOG(ERROR) << "cuMemAlloc failed to allocate " << num_bytes;
+    return nullptr;
+  }
+  return reinterpret_cast(rv);
+#else
+  return nullptr;
+#endif  // GOOGLE_CUDA
+}
+void GPUcudaMallocAllocator::DeallocateRaw(void* ptr) {
+#ifdef GOOGLE_CUDA
+  // free with cudaFree
+  CUresult res = cuMemFree(reinterpret_cast(ptr));
+  if (res != CUDA_SUCCESS) {
+    LOG(ERROR) << "cuMemFree failed to free " << ptr;
+  }
+#endif  // GOOGLE_CUDA
+}
+
+void GPUcudaMallocAllocator::AddAllocVisitor(Visitor visitor) {
+  return base_allocator_->AddAllocVisitor(visitor);
+}
+
+void GPUcudaMallocAllocator::AddFreeVisitor(Visitor visitor) {
+  return base_allocator_->AddFreeVisitor(visitor);
+}
+
+bool GPUcudaMallocAllocator::TracksAllocationSizes() { return false; }
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
new file mode 100644
index 0000000000..23552b809a
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
@@ -0,0 +1,52 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMMON_RUNTIME_GPU_GPU_CUDA_MALLOC_ALLOCATOR_H_
+#define TENSORFLOW_COMMON_RUNTIME_GPU_GPU_CUDA_MALLOC_ALLOCATOR_H_
+
+#include 
+
+#include "tensorflow/core/common_runtime/visitable_allocator.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/stream_executor.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+// An allocator that wraps a GPU allocator and adds debugging
+// functionality that verifies that users do not write outside their
+// allocated memory.
+class GPUcudaMallocAllocator : public VisitableAllocator {
+ public:
+  explicit GPUcudaMallocAllocator(VisitableAllocator* allocator, int device_id);
+  ~GPUcudaMallocAllocator() override;
+  string Name() override { return "gpu_debug"; }
+  void* AllocateRaw(size_t alignment, size_t num_bytes) override;
+  void DeallocateRaw(void* ptr) override;
+  void AddAllocVisitor(Visitor visitor) override;
+  void AddFreeVisitor(Visitor visitor) override;
+  bool TracksAllocationSizes() override;
+
+ private:
+  VisitableAllocator* base_allocator_ = nullptr;  // owned
+
+  perftools::gputools::StreamExecutor* stream_exec_;  // Not owned.
+
+  TF_DISALLOW_COPY_AND_ASSIGN(GPUcudaMallocAllocator);
+};
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMMON_RUNTIME_GPU_GPU_CUDAMALLOC_ALLOCATOR_H_
diff --git a/tensorflow/core/common_runtime/gpu/process_state.cc b/tensorflow/core/common_runtime/gpu/process_state.cc
index 6b3c58ac9c..0675dbf3fc 100644
--- a/tensorflow/core/common_runtime/gpu/process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/process_state.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include 
 
 #include "tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/common_runtime/gpu/pool_allocator.h"
@@ -48,6 +49,27 @@ namespace gpu = ::perftools::gputools;
 
 namespace tensorflow {
 
+namespace {
+bool useCudaMallocAllocator() {
+  const char* debug_allocator_str = std::getenv("TF_GPU_ALLOCATOR");
+  if (debug_allocator_str != nullptr &&
+      strcmp(debug_allocator_str, "cuda_malloc") == 0)
+    return true;
+  else
+    return false;
+}
+
+bool useCudaMemoryGuardAllocator() {
+  const char* debug_allocator_str = std::getenv("TF_GPU_ALLOCATOR");
+  if (debug_allocator_str != nullptr &&
+      strcmp(debug_allocator_str, "memory_guard") == 0)
+    return true;
+  else
+    return false;
+}
+
+}  // namespace
+
 ProcessState* ProcessState::instance_ = nullptr;
 
 /*static*/ ProcessState* ProcessState::singleton() {
@@ -114,10 +136,14 @@ Allocator* ProcessState::GetGPUAllocator(const GPUOptions& options, int gpu_id,
 
     // If true, checks for memory overwrites by writing
     // distinctive patterns on both ends of allocated memory.
-    static const bool kGPUDebug = false;
-    if (kGPUDebug) {
+    if (useCudaMemoryGuardAllocator()) {
       gpu_allocator = new GPUDebugAllocator(gpu_allocator, gpu_id);
       gpu_allocator = new GPUNanResetAllocator(gpu_allocator, gpu_id);
+    } else if (useCudaMallocAllocator()) {
+      // If true, passes all allocation requests through to cudaMalloc
+      // useful for doing memory debugging with tools like cuda-memcheck
+      // **WARNING** probably will not work in a multi-gpu scenario
+      gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, gpu_id);
     }
     gpu_allocators_[gpu_id] = gpu_allocator;
 
-- 
GitLab


From 2bca5a18de91f60f9d34a353ca7491a65d8f3b13 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo 
Date: Tue, 29 Aug 2017 15:14:40 -0700
Subject: [PATCH 080/294] Add int32 gpu registrations for SetZero & SetOne
 functors.

PiperOrigin-RevId: 166911343
---
 tensorflow/core/kernels/constant_op_gpu.cu.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/core/kernels/constant_op_gpu.cu.cc b/tensorflow/core/kernels/constant_op_gpu.cu.cc
index 56bd918f3c..d1a1e34ec3 100644
--- a/tensorflow/core/kernels/constant_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/constant_op_gpu.cu.cc
@@ -95,6 +95,7 @@ DEFINE_SETZERO_GPU(float);
 DEFINE_SETZERO_GPU(double);
 DEFINE_SETZERO_GPU(complex64);
 DEFINE_SETZERO_GPU(complex128);
+DEFINE_SETZERO_GPU(int32);
 DEFINE_SETZERO_GPU(int64);
 #undef DEFINE_SETZERO_GPU
 
@@ -113,6 +114,7 @@ DEFINE_SETONE_GPU(float);
 DEFINE_SETONE_GPU(double);
 DEFINE_SETONE_GPU(complex64);
 DEFINE_SETONE_GPU(complex128);
+DEFINE_SETONE_GPU(int32);
 DEFINE_SETONE_GPU(int64);
 #undef DEFINE_SETONE_GPU
 
-- 
GitLab


From a23532cb3f5a26ac45464c817b96505059d54f04 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 15:21:53 -0700
Subject: [PATCH 081/294] Update ops-related pbtxt files.

PiperOrigin-RevId: 166912439
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 24 +++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 29 +++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 17343dc23d..8a481fadf8 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -8288,6 +8288,30 @@ op {
     }
   }
 }
+op {
+  name: "ExtractJpegShape"
+  input_arg {
+    name: "contents"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "image_shape"
+    type_attr: "output_type"
+  }
+  attr {
+    name: "output_type"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
 op {
   name: "FFT"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 823dbd5b6b..f5723807f6 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -7805,6 +7805,35 @@ op {
   }
   summary: "Extract `patches` from `images` and put them in the \"depth\" output dimension."
 }
+op {
+  name: "ExtractJpegShape"
+  input_arg {
+    name: "contents"
+    description: "0-D. The JPEG-encoded image."
+    type: DT_STRING
+  }
+  output_arg {
+    name: "image_shape"
+    description: "1-D. The image shape with format [height, width, channels]."
+    type_attr: "output_type"
+  }
+  attr {
+    name: "output_type"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    description: "(Optional) The output type of the operation (int32 or int64).\nDefaults to int32."
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  summary: "Extract the shape information of a JPEG-encoded image."
+  description: "This op only parses the image header, so it is much faster than DecodeJpeg."
+}
 op {
   name: "FFT"
   input_arg {
-- 
GitLab


From 0492dccf024be59f4f38f7650deb9570a2f0c2db Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 15:28:35 -0700
Subject: [PATCH 082/294] Go: Update generated wrapper functions for TensorFlow
 ops.

PiperOrigin-RevId: 166913409
---
 tensorflow/go/op/wrappers.go | 205 +++++++++++++++++++++--------------
 1 file changed, 123 insertions(+), 82 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 25a164348f..1b86ed3929 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -4944,88 +4944,6 @@ func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV
 	return op.Output(0)
 }
 
-// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2.
-type PaddingFIFOQueueV2Attr func(optionalAttr)
-
-// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value.
-//
-// value: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types.
-// Shapes of fixed rank but variable size are allowed by setting
-// any shape dimension to -1.  In this case, the inputs' shape may vary along
-// the given dimension, and DequeueMany will pad the given dimension with
-// zeros up to the maximum shape of all elements in the given batch.
-// If the length of this attr is 0, different queue elements may have
-// different ranks and shapes, but only one element may be dequeued at a time.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shapes"] = value
-	}
-}
-
-// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value.
-//
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// PaddingFIFOQueueV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A queue that produces elements in first-in first-out order.
-//
-// Variable-size shapes are allowed by setting the corresponding shape dimensions
-// to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
-// size of any given element in the minibatch.  See below for details.
-//
-// Arguments:
-//	component_types: The type of each component in a value.
-//
-// Returns The handle to the queue.
-func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "PaddingFIFOQueueV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // FIFOQueueV2Attr is an optional argument to FIFOQueueV2.
 type FIFOQueueV2Attr func(optionalAttr)
 
@@ -6812,6 +6730,129 @@ func AdjustContrastv2(scope *Scope, images tf.Output, contrast_factor tf.Output)
 	return op.Output(0)
 }
 
+// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2.
+type PaddingFIFOQueueV2Attr func(optionalAttr)
+
+// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value.
+//
+// value: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types.
+// Shapes of fixed rank but variable size are allowed by setting
+// any shape dimension to -1.  In this case, the inputs' shape may vary along
+// the given dimension, and DequeueMany will pad the given dimension with
+// zeros up to the maximum shape of all elements in the given batch.
+// If the length of this attr is 0, different queue elements may have
+// different ranks and shapes, but only one element may be dequeued at a time.
+// If not specified, defaults to <>
+//
+// REQUIRES: len(value) >= 0
+func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shapes"] = value
+	}
+}
+
+// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value.
+//
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// PaddingFIFOQueueV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A queue that produces elements in first-in first-out order.
+//
+// Variable-size shapes are allowed by setting the corresponding shape dimensions
+// to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
+// size of any given element in the minibatch.  See below for details.
+//
+// Arguments:
+//	component_types: The type of each component in a value.
+//
+// Returns The handle to the queue.
+func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "PaddingFIFOQueueV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ExtractJpegShapeAttr is an optional argument to ExtractJpegShape.
+type ExtractJpegShapeAttr func(optionalAttr)
+
+// ExtractJpegShapeOutputType sets the optional output_type attribute to value.
+//
+// value: (Optional) The output type of the operation (int32 or int64).
+// Defaults to int32.
+// If not specified, defaults to DT_INT32
+func ExtractJpegShapeOutputType(value tf.DataType) ExtractJpegShapeAttr {
+	return func(m optionalAttr) {
+		m["output_type"] = value
+	}
+}
+
+// Extract the shape information of a JPEG-encoded image.
+//
+// This op only parses the image header, so it is much faster than DecodeJpeg.
+//
+// Arguments:
+//	contents: 0-D. The JPEG-encoded image.
+//
+// Returns 1-D. The image shape with format [height, width, channels].
+func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegShapeAttr) (image_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ExtractJpegShape",
+		Input: []tf.Input{
+			contents,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // DecodeJpegAttr is an optional argument to DecodeJpeg.
 type DecodeJpegAttr func(optionalAttr)
 
-- 
GitLab


From 5a76b8dce388a0a03849979a7eb655d1780cfd88 Mon Sep 17 00:00:00 2001
From: Ali Yahya 
Date: Tue, 29 Aug 2017 16:15:05 -0700
Subject: [PATCH 083/294] Modified variable scopes to work with Eager mode.

PiperOrigin-RevId: 166920017
---
 tensorflow/python/framework/test_util.py      |   4 +-
 tensorflow/python/kernel_tests/BUILD          |   2 +
 .../resource_variable_ops_test.py             |   4 +-
 .../kernel_tests/variable_scope_test.py       | 584 ++++++++++--------
 .../python/ops/resource_variable_ops.py       |   5 +-
 tensorflow/python/ops/variable_scope.py       |  56 +-
 6 files changed, 359 insertions(+), 296 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index be51a38daf..c65816a543 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -277,7 +277,7 @@ def enable_c_api(fn):
 
 def run_in_graph_and_eager_modes(__unused__=None, graph=None, config=None,
                                  use_gpu=False, force_gpu=False,
-                                 reset_test=False):
+                                 reset_test=True):
   """Runs the test in both graph and eager modes.
 
   Args:
@@ -305,6 +305,8 @@ def run_in_graph_and_eager_modes(__unused__=None, graph=None, config=None,
           f(self)
 
       if reset_test:
+        # This decorator runs the wrapped test twice.
+        # Reset the test environment between runs.
         self.tearDown()
         self.setUp()
 
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 2db686333d..bdd9a6dc22 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -932,12 +932,14 @@ tf_py_test(
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:init_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:variables",
+        "//tensorflow/python/eager:context",
     ],
     tags = ["no_windows"],
 )
diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index 98fc568515..c31732d807 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -53,7 +53,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
                                                    0,
                                                    dtype=dtypes.int32)).run()
 
-  def testReadVariableDtypeMismatch(self):
+  def testReadVariableDtypeMismatchEager(self):
     with context.eager_mode():
       handle = resource_variable_ops.var_handle_op(
           dtype=dtypes.int32, shape=[1], name="foo")
@@ -62,7 +62,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
                                    "Expected float got int32."):
         _ = resource_variable_ops.read_variable_op(handle, dtype=dtypes.float32)
 
-  def testAssignVariableDtypeMismatch(self):
+  def testAssignVariableDtypeMismatchEager(self):
     with context.eager_mode():
       handle = resource_variable_ops.var_handle_op(
           dtype=dtypes.int32, shape=[1], name="foo")
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 67932d0823..cdac12f05a 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -20,10 +20,12 @@ from __future__ import print_function
 
 import numpy
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import init_ops
@@ -37,32 +39,38 @@ from tensorflow.python.platform import test
 
 class VariableScopeTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def testGetVar(self):
     vs = variable_scope._get_default_variable_store()
     v = vs.get_variable("v", [1])
     v1 = vs.get_variable("v", [1])
     self.assertEqual(v, v1)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testResource(self):
     vs = variable_scope._get_default_variable_store()
     v1 = vs.get_variable("v", [1], use_resource=True)
     self.assertTrue(isinstance(v1, resource_variable_ops.ResourceVariable))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testNameExists(self):
     vs = variable_scope._get_default_variable_store()
     # No check by default, so we can both create and get existing names.
     v = vs.get_variable("v", [1])
     v1 = vs.get_variable("v", [1])
     self.assertEqual(v, v1)
-    # When reuse is False, we fail when variables are already there.
-    vs.get_variable("w", [1], reuse=False)  # That's ok.
-    with self.assertRaises(ValueError):
-      vs.get_variable("v", [1], reuse=False)  # That fails.
-    # When reuse is True, we fail when variables are new.
-    vs.get_variable("v", [1], reuse=True)  # That's ok.
-    with self.assertRaises(ValueError):
-      vs.get_variable("u", [1], reuse=True)  # That fails.
 
+    if context.in_graph_mode():
+      # When reuse is False, we fail when variables are already there.
+      vs.get_variable("w", [1], reuse=False)  # That's ok.
+      with self.assertRaises(ValueError):
+        vs.get_variable("v", [1], reuse=False)  # That fails.
+      # When reuse is True, we fail when variables are new.
+      vs.get_variable("v", [1], reuse=True)  # That's ok.
+      with self.assertRaises(ValueError):
+        vs.get_variable("u", [1], reuse=True)  # That fails.
+
+  @test_util.run_in_graph_and_eager_modes()
   def testNamelessStore(self):
     vs = variable_scope._get_default_variable_store()
     vs.get_variable("v1", [2])
@@ -71,22 +79,23 @@ class VariableScopeTest(test.TestCase):
     self.assertEqual(
         set(expected_names), set([v.name for v in vs._vars.values()]))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testVarScopeInitializer(self):
-    with self.test_session() as sess:
-      init = init_ops.constant_initializer(0.3)
-      with variable_scope.variable_scope("tower") as tower:
-        with variable_scope.variable_scope("foo", initializer=init):
-          v = variable_scope.get_variable("v", [])
-          sess.run(variables_lib.initialize_variables([v]))
-          self.assertAllClose(v.eval(), 0.3)
-        with variable_scope.variable_scope(tower, initializer=init):
-          w = variable_scope.get_variable("w", [])
-          sess.run(variables_lib.initialize_variables([w]))
-          self.assertAllClose(w.eval(), 0.3)
+    init = init_ops.constant_initializer(0.3)
+    with variable_scope.variable_scope("tower0") as tower:
+      with variable_scope.variable_scope("foo", initializer=init):
+        v = variable_scope.get_variable("v", [])
+        self.evaluate(variables_lib.variables_initializer([v]))
+        self.assertAllClose(self.evaluate(v.value()), 0.3)
+      with variable_scope.variable_scope(tower, initializer=init):
+        w = variable_scope.get_variable("w", [])
+        self.evaluate(variables_lib.variables_initializer([w]))
+        self.assertAllClose(self.evaluate(w.value()), 0.3)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testVarScopeConstraint(self):
     constraint = lambda x: 0. * x
-    with variable_scope.variable_scope("tower") as tower:
+    with variable_scope.variable_scope("tower1") as tower:
       with variable_scope.variable_scope("foo", constraint=constraint):
         v = variable_scope.get_variable("v", [])
         self.assertEqual(v.constraint, constraint)
@@ -94,51 +103,56 @@ class VariableScopeTest(test.TestCase):
         w = variable_scope.get_variable("w", [])
         self.assertEqual(w.constraint, constraint)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testVarScopeDType(self):
-    with self.test_session():
-      with variable_scope.variable_scope("tower") as tower:
-        with variable_scope.variable_scope("foo", dtype=dtypes.float16):
-          v = variable_scope.get_variable("v", [])
-          self.assertEqual(v.dtype.base_dtype, dtypes.float16)
-        with variable_scope.variable_scope(tower, dtype=dtypes.float16):
-          w = variable_scope.get_variable("w", [])
-          self.assertEqual(w.dtype.base_dtype, dtypes.float16)
+    with variable_scope.variable_scope("tower2") as tower:
+      with variable_scope.variable_scope("foo", dtype=dtypes.float16):
+        v = variable_scope.get_variable("v", [])
+        self.assertEqual(v.dtype.base_dtype, dtypes.float16)
+      with variable_scope.variable_scope(tower, dtype=dtypes.float16):
+        w = variable_scope.get_variable("w", [])
+        self.assertEqual(w.dtype.base_dtype, dtypes.float16)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testInitFromNonTensorValue(self):
-    with self.test_session() as sess:
-      v = variable_scope.get_variable("v", initializer=4, dtype=dtypes.int32)
-      sess.run(variables_lib.initialize_variables([v]))
-      self.assertAllClose(v.eval(), 4)
+    v = variable_scope.get_variable("v4", initializer=4, dtype=dtypes.int32)
+    self.evaluate(variables_lib.variables_initializer([v]))
+    self.assertAllClose(self.evaluate(v.value()), 4)
 
-      w = variable_scope.get_variable(
-          "w", initializer=numpy.array([1, 2, 3]), dtype=dtypes.int64)
-      sess.run(variables_lib.initialize_variables([w]))
-      self.assertAllClose(w.eval(), [1, 2, 3])
+    w = variable_scope.get_variable(
+        "w4", initializer=numpy.array([1, 2, 3]), dtype=dtypes.int64)
+    self.evaluate(variables_lib.variables_initializer([w]))
+    self.assertAllClose(self.evaluate(w.value()), [1, 2, 3])
 
+    if context.in_graph_mode():
       with self.assertRaises(TypeError):
-        variable_scope.get_variable("x", initializer={})
+        variable_scope.get_variable("x4", initializer={})
+    else:
+      with self.assertRaises(errors.InvalidArgumentError):
+        variable_scope.get_variable("x4", initializer={})
 
+  @test_util.run_in_graph_and_eager_modes()
   def testInitFromNonInitializer(self):
-    with self.test_session():
-      # Test various dtypes with zeros initializer as following:
-      types = [
-          dtypes.int8, dtypes.uint8, dtypes.int16, dtypes.uint16, dtypes.int32,
-          dtypes.int64, dtypes.bool
-      ]
-
-      # Use different variable_name to distinguish various dtypes
-      for (i, dtype) in enumerate(types):
-        x = variable_scope.get_variable(
-            name="x%d" % i, shape=(3, 4), dtype=dtype)
-        y = variable_scope.get_variable(
-            name="y%d" % i,
-            shape=(3, 4),
-            dtype=dtype,
-            initializer=init_ops.zeros_initializer(dtype=dtype))
-
-        variables_lib.global_variables_initializer().run()
-        self.assertAllEqual(x.eval(), y.eval())
-
+    # Test various dtypes with zeros initializer as following:
+    types = [
+        dtypes.int8, dtypes.uint8, dtypes.int16, dtypes.uint16, dtypes.int32,
+        dtypes.int64, dtypes.bool
+    ]
+
+    # Use different variable_name to distinguish various dtypes
+    for (i, dtype) in enumerate(types):
+      x = variable_scope.get_variable(
+          name="xx%d" % i, shape=(3, 4), dtype=dtype)
+      y = variable_scope.get_variable(
+          name="yy%d" % i,
+          shape=(3, 4),
+          dtype=dtype,
+          initializer=init_ops.zeros_initializer(dtype=dtype))
+
+      self.evaluate(variables_lib.global_variables_initializer())
+      self.assertAllEqual(self.evaluate(x.value()), self.evaluate(y.value()))
+
+  # TODO(alive): support variable partitioning/caching in eager mode.
   def testVarScopeCachingDevice(self):
     with self.test_session():
       caching_device = "/job:moo"
@@ -172,74 +186,74 @@ class VariableScopeTest(test.TestCase):
         v_tower = variable_scope.get_variable("v", [])
         self.assertFalse(v_tower.value().device.startswith(caching_device))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testVarScopeRegularizer(self):
-    with self.test_session() as sess:
-      init = init_ops.constant_initializer(0.3)
-
-      def regularizer1(v):
-        return math_ops.reduce_mean(v) + 0.1
-
-      def regularizer2(v):
-        return math_ops.reduce_mean(v) + 0.2
-
-      with variable_scope.variable_scope(
-          "tower", regularizer=regularizer1) as tower:
-        with variable_scope.variable_scope("foo", initializer=init):
-          v = variable_scope.get_variable("v", [])
-          sess.run(variables_lib.initialize_variables([v]))
-          losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-          self.assertEqual(1, len(losses))
-          self.assertAllClose(losses[0].eval(), 0.4)
-        with variable_scope.variable_scope(tower, initializer=init) as vs:
-          u = variable_scope.get_variable("u", [])
-          vs.set_regularizer(regularizer2)
-          w = variable_scope.get_variable("w", [])
-          # Next 3 variable not regularized to test disabling regularization.
-          x = variable_scope.get_variable(
-              "x", [], regularizer=variable_scope.no_regularizer)
-          with variable_scope.variable_scope(
-              "baz", regularizer=variable_scope.no_regularizer):
-            y = variable_scope.get_variable("y", [])
-          vs.set_regularizer(variable_scope.no_regularizer)
-          z = variable_scope.get_variable("z", [])
-          # Check results.
-          losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-          self.assertEqual(3, len(losses))
-          sess.run(variables_lib.initialize_variables([u, w, x, y, z]))
-          self.assertAllClose(losses[0].eval(), 0.4)
-          self.assertAllClose(losses[1].eval(), 0.4)
-          self.assertAllClose(losses[2].eval(), 0.5)
-        with variable_scope.variable_scope("foo", reuse=True):
-          v = variable_scope.get_variable("v",
-                                          [])  # "v" is alredy there, reused
-          losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
-          self.assertEqual(3, len(losses))  # No new loss added.
+    init = init_ops.constant_initializer(0.3)
 
-  def testInitializeFromValue(self):
-    with self.test_session() as sess:
-      init = constant_op.constant(0.1)
-      w = variable_scope.get_variable("v", initializer=init)
-      sess.run(variables_lib.initialize_variables([w]))
-      self.assertAllClose(w.eval(), 0.1)
+    def regularizer1(v):
+      return math_ops.reduce_mean(v) + 0.1
 
-      with self.assertRaisesRegexp(ValueError, "shape"):
-        # We disallow explicit shape specification when initializer is constant.
-        variable_scope.get_variable("u", [1], initializer=init)
+    def regularizer2(v):
+      return math_ops.reduce_mean(v) + 0.2
 
+    with variable_scope.variable_scope(
+        "tower3", regularizer=regularizer1) as tower:
       with variable_scope.variable_scope("foo", initializer=init):
-        # Constant initializer can be passed through scopes if needed.
-        v = variable_scope.get_variable("v")
-        sess.run(variables_lib.initialize_variables([v]))
-        self.assertAllClose(v.eval(), 0.1)
-
-      # Check that non-float32 initializer creates a non-float32 variable.
-      init = constant_op.constant(1, dtype=dtypes.int32)
-      t = variable_scope.get_variable("t", initializer=init)
-      self.assertEqual(t.dtype.base_dtype, dtypes.int32)
-
-      # Raise error if `initializer` dtype and `dtype` are not identical.
-      with self.assertRaisesRegexp(ValueError, "don't match"):
-        variable_scope.get_variable("s", initializer=init, dtype=dtypes.float64)
+        v = variable_scope.get_variable("v", [])
+        self.evaluate(variables_lib.variables_initializer([v]))
+        losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+        self.assertEqual(1, len(losses))
+        self.assertAllClose(self.evaluate(losses[0]), 0.4)
+      with variable_scope.variable_scope(tower, initializer=init) as vs:
+        u = variable_scope.get_variable("u", [])
+        vs.set_regularizer(regularizer2)
+        w = variable_scope.get_variable("w", [])
+        # Next 3 variable not regularized to test disabling regularization.
+        x = variable_scope.get_variable(
+            "x", [], regularizer=variable_scope.no_regularizer)
+        with variable_scope.variable_scope(
+            "baz", regularizer=variable_scope.no_regularizer):
+          y = variable_scope.get_variable("y", [])
+        vs.set_regularizer(variable_scope.no_regularizer)
+        z = variable_scope.get_variable("z", [])
+        # Check results.
+        losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+        self.assertEqual(3, len(losses))
+        self.evaluate(variables_lib.variables_initializer([u, w, x, y, z]))
+        self.assertAllClose(self.evaluate(losses[0]), 0.4)
+        self.assertAllClose(self.evaluate(losses[1]), 0.4)
+        self.assertAllClose(self.evaluate(losses[2]), 0.5)
+      with variable_scope.variable_scope("foo", reuse=True):
+        v = variable_scope.get_variable("v",
+                                        [])  # "v" is alredy there, reused
+        losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
+        self.assertEqual(3, len(losses))  # No new loss added.
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testInitializeFromValue(self):
+    init = constant_op.constant(0.1)
+    w = variable_scope.get_variable("v", initializer=init)
+    self.evaluate(variables_lib.variables_initializer([w]))
+    self.assertAllClose(self.evaluate(w.value()), 0.1)
+
+    with self.assertRaisesRegexp(ValueError, "shape"):
+      # We disallow explicit shape specification when initializer is constant.
+      variable_scope.get_variable("u", [1], initializer=init)
+
+    with variable_scope.variable_scope("foo", initializer=init):
+      # Constant initializer can be passed through scopes if needed.
+      v = variable_scope.get_variable("v")
+      self.evaluate(variables_lib.variables_initializer([v]))
+      self.assertAllClose(self.evaluate(v.value()), 0.1)
+
+    # Check that non-float32 initializer creates a non-float32 variable.
+    init = constant_op.constant(1, dtype=dtypes.int32)
+    t = variable_scope.get_variable("t", initializer=init)
+    self.assertEqual(t.dtype.base_dtype, dtypes.int32)
+
+    # Raise error if `initializer` dtype and `dtype` are not identical.
+    with self.assertRaisesRegexp(ValueError, "don't match"):
+      variable_scope.get_variable("s", initializer=init, dtype=dtypes.float64)
 
   def testControlDeps(self):
     with self.test_session() as sess:
@@ -250,16 +264,16 @@ class VariableScopeTest(test.TestCase):
             "v1", [1], initializer=init_ops.constant_initializer(1))
         add = v1 + v0
       # v0 should be uninitialized.
-      with self.assertRaisesRegexp(errors_impl.OpError, "uninitialized"):
+      with self.assertRaisesRegexp(errors.OpError, "uninitialized"):
         sess.run(v0)
       # We should be able to initialize and run v1 without initializing
       # v0, even if the variable was created with a control dep on v0.
       sess.run(v1.initializer)
       self.assertEqual(1, sess.run(v1))
       # v0 should still be uninitialized.
-      with self.assertRaisesRegexp(errors_impl.OpError, "uninitialized"):
+      with self.assertRaisesRegexp(errors.OpError, "uninitialized"):
         sess.run(v0)
-      with self.assertRaisesRegexp(errors_impl.OpError, "uninitialized"):
+      with self.assertRaisesRegexp(errors.OpError, "uninitialized"):
         sess.run(add)
       # If we initialize v0 we should be able to run 'add'.
       sess.run(v0.initializer)
@@ -295,82 +309,85 @@ class VariableScopeTest(test.TestCase):
       sess.run(v2.initializer)
       self.assertEqual([2], sess.run(v2))
       # v0 should still be uninitialized.
-      with self.assertRaisesRegexp(errors_impl.OpError, "uninitialized"):
+      with self.assertRaisesRegexp(errors.OpError, "uninitialized"):
         sess.run(v0)
       # We should not be able to run 'add' yet.
-      with self.assertRaisesRegexp(errors_impl.OpError, "uninitialized"):
+      with self.assertRaisesRegexp(errors.OpError, "uninitialized"):
         sess.run(add)
       # If we initialize v0 we should be able to run 'add'.
       sess.run(v0.initializer)
       sess.run(add)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testGetVariableScope(self):
     # Test the get_variable_scope() function and setting properties of result.
-    with self.test_session() as sess:
-      init = init_ops.constant_initializer(0.3)
-      with variable_scope.variable_scope("foo"):
-        new_init1 = variable_scope.get_variable_scope().initializer
-        self.assertEqual(new_init1, None)
-        # Check that we can set initializer like this.
-        variable_scope.get_variable_scope().set_initializer(init)
-        v = variable_scope.get_variable("v", [])
-        sess.run(variables_lib.initialize_variables([v]))
-        self.assertAllClose(v.eval(), 0.3)
+    init = init_ops.constant_initializer(0.3)
+    with variable_scope.variable_scope("bar"):
+      new_init1 = variable_scope.get_variable_scope().initializer
+      self.assertEqual(new_init1, None)
+      # Check that we can set initializer like this.
+      variable_scope.get_variable_scope().set_initializer(init)
+      v = variable_scope.get_variable("v", [])
+      self.evaluate(variables_lib.variables_initializer([v]))
+      self.assertAllClose(self.evaluate(v.value()), 0.3)
+      if context.in_graph_mode():
         # Check that we can set reuse.
         variable_scope.get_variable_scope().reuse_variables()
         with self.assertRaises(ValueError):  # Fail, w does not exist yet.
           variable_scope.get_variable("w", [1])
-      # Check that the set initializer goes away.
-      new_init = variable_scope.get_variable_scope().initializer
-      self.assertEqual(new_init, None)
+    # Check that the set initializer goes away.
+    new_init = variable_scope.get_variable_scope().initializer
+    self.assertEqual(new_init, None)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testVarScope(self):
-    with self.test_session():
-      with variable_scope.variable_scope("tower") as tower:
-        self.assertEqual(tower.name, "tower")
+    with variable_scope.variable_scope("tower4") as tower:
+      self.assertEqual(tower.name, "tower4")
+      with ops.name_scope("scope") as sc:
+        self.assertEqual(sc, "tower4/scope/")
+
+    with variable_scope.variable_scope("tower5"):
+      with variable_scope.variable_scope("bar") as bar:
+        self.assertEqual(bar.name, "tower5/bar")
         with ops.name_scope("scope") as sc:
-          self.assertEqual(sc, "tower/scope/")
+          self.assertEqual(sc, "tower5/bar/scope/")
 
-      with variable_scope.variable_scope("foo"):
-        with variable_scope.variable_scope("bar") as bar:
-          self.assertEqual(bar.name, "foo/bar")
-          with ops.name_scope("scope") as sc:
-            self.assertEqual(sc, "foo/bar/scope/")
-
-      with variable_scope.variable_scope("foo"):
-        with variable_scope.variable_scope(tower, reuse=True) as tower_shared:
-          self.assertEqual(tower_shared.name, "tower")
-          with ops.name_scope("scope") as sc:
-            self.assertEqual(sc, "foo_1/tower/scope/")
+    with variable_scope.variable_scope("tower6"):
+      with variable_scope.variable_scope(tower, reuse=True) as tower_shared:
+        self.assertEqual(tower_shared.name, "tower4")
+        with ops.name_scope("scope") as sc:
+          self.assertEqual(sc, "tower6/tower4/scope/")
 
+  @test_util.run_in_graph_and_eager_modes()
   def testVarScopeNameScope(self):
-    with self.test_session():
-      with ops.name_scope("scope1"):
-        with variable_scope.variable_scope("tower") as tower:
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "scope1/tower/scope2/")
+    with ops.name_scope("testVarScopeNameScope1"):
+      with variable_scope.variable_scope("tower") as tower:
+        with ops.name_scope("scope2") as sc2:
+          self.assertEqual(sc2, "testVarScopeNameScope1/tower/scope2/")
+      if context.in_graph_mode():
         with variable_scope.variable_scope(
             tower):  # Re-entering acts like another "tower".
           with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "scope1/tower_1/scope2/")
+            self.assertEqual(sc2, "testVarScopeNameScope1/tower_1/scope2/")
         with variable_scope.variable_scope(
             "tower"):  # Re-entering by string acts the same.
           with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "scope1/tower_2/scope2/")
+            self.assertEqual(sc2, "testVarScopeNameScope1/tower_2/scope2/")
 
-      with ops.name_scope("scope3"):
-        with variable_scope.variable_scope("tower"):
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "scope3/tower/scope2/")
+    with ops.name_scope("testVarScopeNameScope2"):
+      with variable_scope.variable_scope("tower"):
+        with ops.name_scope("scope2") as sc2:
+          self.assertEqual(sc2, "testVarScopeNameScope2/tower/scope2/")
+      if context.in_graph_mode():
         with variable_scope.variable_scope(tower):
           with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "scope3/tower_1/scope2/")
+            self.assertEqual(sc2, "testVarScopeNameScope2/tower_1/scope2/")
 
-      root_var_scope = variable_scope.get_variable_scope()
-      with ops.name_scope("scope4"):
-        with variable_scope.variable_scope(root_var_scope):
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "scope4/scope2/")
+    root_var_scope = variable_scope.get_variable_scope()
+    with ops.name_scope("testVarScopeNameScope3"):
+      with variable_scope.variable_scope(root_var_scope):
+        with ops.name_scope("scope2") as sc2:
+          self.assertEqual(sc2, "testVarScopeNameScope3/scope2/")
 
   def testVarScopeOriginalNameScope(self):
     with self.test_session():
@@ -422,51 +439,46 @@ class VariableScopeTest(test.TestCase):
       with variable_scope.variable_scope(vs, reuse=False) as jump_no_reuse:
         self.assertFalse(jump_no_reuse.reuse)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testVarScopeGetOrCreateReuse(self):
-    x = array_ops.placeholder(dtypes.float32)
-
-    with variable_scope.variable_scope("bar",
-                                       reuse=variable_scope.AUTO_REUSE):
-      v_assign = state_ops.assign(variable_scope.get_variable("var", []), x)
-
-    with variable_scope.variable_scope("bar",
-                                       reuse=variable_scope.AUTO_REUSE):
-      v = variable_scope.get_variable("var", [])
-
-    with self.test_session() as sess:
-      def test_value(value):
-        sess.run(v_assign, feed_dict={x: value})
-        self.assertEqual(value, v.eval())
-
-      test_value(42)  # Variable is created.
-      test_value(13)  # Variable is reused hereafter.
-      test_value(17)
+    def test_value(value):
+      x = constant_op.constant(value)
+      with variable_scope.variable_scope("testVarScopeGetOrCreateReuse_bar",
+                                         reuse=variable_scope.AUTO_REUSE):
+        _ = state_ops.assign(variable_scope.get_variable("var", []), x)
+      with variable_scope.variable_scope("testVarScopeGetOrCreateReuse_bar",
+                                         reuse=variable_scope.AUTO_REUSE):
+        _ = variable_scope.get_variable("var", [])
+      self.assertEqual(value, self.evaluate(x))
+    test_value(42.)  # Variable is created.
+    test_value(13.)  # Variable is reused hereafter.
+    test_value(17.)
 
   def testVarOpScope(self):
     with self.test_session():
-      with ops.name_scope("scope1"):
+      with ops.name_scope("testVarOpScope1"):
         with variable_scope.variable_scope("tower", "default", []):
           self.assertEqual(
               variable_scope.get_variable("w", []).name, "tower/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "scope1/tower/scope2/")
+          with ops.name_scope("testVarOpScope2") as sc2:
+            self.assertEqual(sc2, "testVarOpScope1/tower/testVarOpScope2/")
         with variable_scope.variable_scope("tower", "default", []):
           with self.assertRaises(ValueError):
             variable_scope.get_variable("w", [])
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "scope1/tower_1/scope2/")
+          with ops.name_scope("testVarOpScope2") as sc2:
+            self.assertEqual(sc2, "testVarOpScope1/tower_1/testVarOpScope2/")
 
-      with ops.name_scope("scope2"):
+      with ops.name_scope("testVarOpScope2"):
         with variable_scope.variable_scope(None, "default", []):
           self.assertEqual(
               variable_scope.get_variable("w", []).name, "default/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "scope2/default/scope2/")
+          with ops.name_scope("testVarOpScope2") as sc2:
+            self.assertEqual(sc2, "testVarOpScope2/default/testVarOpScope2/")
         with variable_scope.variable_scope(None, "default", []):
           self.assertEqual(
               variable_scope.get_variable("w", []).name, "default_1/w:0")
-          with ops.name_scope("scope2") as sc2:
-            self.assertEqual(sc2, "scope2/default_1/scope2/")
+          with ops.name_scope("testVarOpScope2") as sc2:
+            self.assertEqual(sc2, "testVarOpScope2/default_1/testVarOpScope2/")
 
   def testVarOpScopeUniqueNamesInterleavedSubstringScopes(self):
     with self.test_session():
@@ -714,27 +726,27 @@ class VariableScopeTest(test.TestCase):
           with ops.name_scope("scope2") as sc2:
             self.assertEqual(sc2, "outer_1/default/scope2/")
 
+  @test_util.run_in_graph_and_eager_modes()
   def testGetLocalVar(self):
-    with self.test_session():
-      # Check that local variable respects naming.
-      with variable_scope.variable_scope("outer") as outer:
-        with variable_scope.variable_scope(outer, "default", []):
-          local_var = variable_scope.get_local_variable(
-              "w", [], collections=["foo"])
-          self.assertEqual(local_var.name, "outer/w:0")
-
-      # Since variable is local, it should be in the local variable collection
-      # but not the trainable collection.
-      self.assertIn(local_var,
-                    ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES))
-      self.assertIn(local_var, ops.get_collection("foo"))
-      self.assertNotIn(local_var,
-                       ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
-
-      # Check that local variable respects `reuse`.
-      with variable_scope.variable_scope(outer, "default", reuse=True):
-        self.assertEqual(
-            variable_scope.get_local_variable("w", []).name, "outer/w:0")
+    # Check that local variable respects naming.
+    with variable_scope.variable_scope("outer") as outer:
+      with variable_scope.variable_scope(outer, "default", []):
+        local_var = variable_scope.get_local_variable(
+            "w", [], collections=["foo"])
+        self.assertEqual(local_var.name, "outer/w:0")
+
+    # Since variable is local, it should be in the local variable collection
+    # but not the trainable collection.
+    self.assertIn(local_var,
+                  ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES))
+    self.assertIn(local_var, ops.get_collection("foo"))
+    self.assertNotIn(local_var,
+                     ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
+
+    # Check that local variable respects `reuse`.
+    with variable_scope.variable_scope(outer, "default", reuse=True):
+      self.assertEqual(
+          variable_scope.get_local_variable("w", []).name, "outer/w:0")
 
   def testGetVarWithDevice(self):
     g = ops.Graph()
@@ -753,69 +765,93 @@ class VariableScopeTest(test.TestCase):
     self.assertEqual(varname_type[0], ("x", dtypes.float32))
     self.assertEqual(varname_type[1], ("y", dtypes.int64))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testGetCollection(self):
-    with self.test_session():
-      _ = variable_scope.get_variable("a", [])
-      _ = variable_scope.get_variable("b", [], trainable=False)
-      with variable_scope.variable_scope("foo_") as scope1:
-        _ = variable_scope.get_variable("a", [])
-        _ = variable_scope.get_variable("b", [], trainable=False)
-        self.assertEqual([
-            v.name
-            for v in scope1.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-        ], ["foo_/a:0"])
-        self.assertEqual([
-            v.name
-            for v in scope1.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-        ], ["foo_/a:0", "foo_/b:0"])
-      with variable_scope.variable_scope("foo") as scope2:
-        _ = variable_scope.get_variable("a", [])
-        _ = variable_scope.get_variable("b", [], trainable=False)
-        self.assertEqual([
-            v.name
-            for v in scope2.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-        ], ["foo/a:0"])
-        self.assertEqual([
-            v.name
-            for v in scope2.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-        ], ["foo/a:0", "foo/b:0"])
-      scope = variable_scope.get_variable_scope()
+    _ = variable_scope.get_variable("testGetCollection_a", [])
+    _ = variable_scope.get_variable("testGetCollection_b", [], trainable=False)
+    with variable_scope.variable_scope("testGetCollection_foo_") as scope1:
+      _ = variable_scope.get_variable("testGetCollection_a", [])
+      _ = variable_scope.get_variable("testGetCollection_b", [],
+                                      trainable=False)
       self.assertEqual([
-          v.name for v in scope.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-      ], ["a:0", "b:0", "foo_/a:0", "foo_/b:0", "foo/a:0", "foo/b:0"])
+          v.name
+          for v in scope1.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      ], ["testGetCollection_foo_/testGetCollection_a:0"])
       self.assertEqual([
           v.name
-          for v in scope.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
-      ], ["a:0", "foo_/a:0", "foo/a:0"])
-
+          for v in scope1.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+      ], [
+          "testGetCollection_foo_/testGetCollection_a:0",
+          "testGetCollection_foo_/testGetCollection_b:0"
+      ])
+    with variable_scope.variable_scope("testGetCollection_foo") as scope2:
+      _ = variable_scope.get_variable("testGetCollection_a", [])
+      _ = variable_scope.get_variable("testGetCollection_b", [],
+                                      trainable=False)
+      self.assertEqual([
+          v.name
+          for v in scope2.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+      ], ["testGetCollection_foo/testGetCollection_a:0"])
+      self.assertEqual([
+          v.name
+          for v in scope2.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+      ], [
+          "testGetCollection_foo/testGetCollection_a:0",
+          "testGetCollection_foo/testGetCollection_b:0"
+      ])
+    scope = variable_scope.get_variable_scope()
+    self.assertEqual([
+        v.name for v in scope.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    ], [
+        "testGetCollection_a:0", "testGetCollection_b:0",
+        "testGetCollection_foo_/testGetCollection_a:0",
+        "testGetCollection_foo_/testGetCollection_b:0",
+        "testGetCollection_foo/testGetCollection_a:0",
+        "testGetCollection_foo/testGetCollection_b:0"
+    ])
+    self.assertEqual([
+        v.name
+        for v in scope.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
+    ], [
+        "testGetCollection_a:0",
+        "testGetCollection_foo_/testGetCollection_a:0",
+        "testGetCollection_foo/testGetCollection_a:0"
+    ])
+
+  @test_util.run_in_graph_and_eager_modes()
   def testGetTrainableVariables(self):
-    with self.test_session():
-      _ = variable_scope.get_variable("a", [])
-      with variable_scope.variable_scope("foo") as scope:
-        _ = variable_scope.get_variable("b", [])
-        _ = variable_scope.get_variable("c", [], trainable=False)
-        self.assertEqual([v.name
-                          for v in scope.trainable_variables()], ["foo/b:0"])
-
+    _ = variable_scope.get_variable("testGetTrainableVariables_a", [])
+    with variable_scope.variable_scope(
+        "testGetTrainableVariables_foo") as scope:
+      _ = variable_scope.get_variable("testGetTrainableVariables_b", [])
+      _ = variable_scope.get_variable("testGetTrainableVariables_c", [],
+                                      trainable=False)
+      self.assertEqual([v.name
+                        for v in scope.trainable_variables()],
+                       ["testGetTrainableVariables_foo/"
+                        "testGetTrainableVariables_b:0"])
+
+  @test_util.run_in_graph_and_eager_modes()
   def testGetGlobalVariables(self):
-    with self.test_session():
-      _ = variable_scope.get_variable("a", [])
-      with variable_scope.variable_scope("foo") as scope:
-        _ = variable_scope.get_variable("b", [])
-        self.assertEqual([v.name
-                          for v in scope.global_variables()], ["foo/b:0"])
-
+    _ = variable_scope.get_variable("testGetGlobalVariables_a", [])
+    with variable_scope.variable_scope("testGetGlobalVariables_foo") as scope:
+      _ = variable_scope.get_variable("testGetGlobalVariables_b", [])
+      self.assertEqual([v.name
+                        for v in scope.global_variables()],
+                       ["testGetGlobalVariables_foo/"
+                        "testGetGlobalVariables_b:0"])
+
+  @test_util.run_in_graph_and_eager_modes()
   def testGetLocalVariables(self):
-    with self.test_session():
+    _ = variable_scope.get_variable(
+        "a", [], collections=[ops.GraphKeys.LOCAL_VARIABLES])
+    with variable_scope.variable_scope("foo") as scope:
       _ = variable_scope.get_variable(
-          "a", [], collections=[ops.GraphKeys.LOCAL_VARIABLES])
-      with variable_scope.variable_scope("foo") as scope:
-        _ = variable_scope.get_variable(
-            "b", [], collections=[ops.GraphKeys.LOCAL_VARIABLES])
-        _ = variable_scope.get_variable(
-            "c", [])
-        self.assertEqual([v.name
-                          for v in scope.local_variables()], ["foo/b:0"])
+          "b", [], collections=[ops.GraphKeys.LOCAL_VARIABLES])
+      _ = variable_scope.get_variable(
+          "c", [])
+      self.assertEqual([v.name
+                        for v in scope.local_variables()], ["foo/b:0"])
 
   def testGetVariableWithRefDtype(self):
     v = variable_scope.get_variable("v", shape=[3, 4], dtype=dtypes.float32)
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index e43cf4bc04..1d747f8400 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -255,8 +255,9 @@ class ResourceVariable(variables.Variable):
                                      context.get_default_context().device_name)
           else:
             initial_value = initial_value()
-            initial_value = ops.convert_to_tensor(
-                initial_value, name="initial_value", dtype=dtype)
+            with ops.name_scope("Initializer"):
+              initial_value = ops.convert_to_tensor(
+                  initial_value, name="initial_value", dtype=dtype)
             self._handle = gen_resource_variable_ops.var_handle_op(
                 shape=initial_value.get_shape(),
                 dtype=initial_value.dtype.base_dtype,
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index b7913890e4..9093c12968 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -259,7 +259,8 @@ class _VariableStore(object):
         applying it on a newly created variable will be added to the collection
         GraphKeys.REGULARIZATION_LOSSES and can be used for regularization.
       reuse: a Boolean, None, or tf.AUTO_REUSE. Controls reuse or creation
-        of variables.
+        of variables. In Eager mode, this argument is always forced to be
+        tf.AUTO_REUSE.
       trainable: If `True` also add the variable to the graph collection
         `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
       collections: List of graph collections keys to add the `Variable` to.
@@ -278,6 +279,7 @@ class _VariableStore(object):
       use_resource: If False, creates a regular Variable. If True, creates
         instead an experimental ResourceVariable which has well-defined
         semantics. Defaults to False (will later change to True).
+        In Eager mode, this argument is always forced to be true.
       custom_getter: Callable that takes as a first argument the true getter,
         and allows overwriting the internal get_variable method.
         The signature of `custom_getter` should match that of this method,
@@ -311,6 +313,10 @@ class _VariableStore(object):
       raise ValueError(
           "Passed a custom_getter which is not callable: %s" % custom_getter)
 
+    if context.in_eager_mode():
+      reuse = AUTO_REUSE
+      use_resource = True
+
     # If a *_ref type is passed in an error would be triggered further down the
     # stack. We prevent this using base_dtype to get a non-ref version of the
     # type, before doing anything else. When _ref types are removed in favor of
@@ -498,6 +504,9 @@ class _VariableStore(object):
         when violating reuse during variable creation, or if an existing
         sharded variable exists for the given name but with different sharding.
     """
+    if context.in_eager_mode():
+      raise NotImplementedError("Partitioned variables are not yet supported "
+                                "in Eager mode.")
 
     initializing_from_value = initializer is not None and isinstance(
         initializer, ops.Tensor)
@@ -792,14 +801,19 @@ class _VariableStore(object):
 
     # Run the regularizer if requested and save the resulting loss.
     if regularizer:
-      with ops.colocate_with(v.op):
+      with ops.colocate_with(v):
         with ops.name_scope(name + "/Regularizer/"):
           loss = regularizer(v)
         if loss is not None:
+          if context.in_graph_mode():
+            v_name = v.name
+            loss_name = loss.name
+          else:
+            v_name = "v_%s" % type(v)
+            loss_name = "loss_%s" % type(loss)
           logging.vlog(1, "Applied regularizer to %s and added the result %s "
-                       "to REGULARIZATION_LOSSES.", v.name, loss.name)
+                       "to REGULARIZATION_LOSSES.", v_name, loss_name)
           ops.add_to_collection(ops.GraphKeys.REGULARIZATION_LOSSES, loss)
-
     return v
 
   # Initialize variable when no initializer provided
@@ -853,7 +867,8 @@ class VariableScope(object):
     initializer: default initializer passed to get_variable.
     regularizer: default regularizer passed to get_variable.
     reuse: Boolean, None, or tf.AUTO_REUSE, setting the reuse in
-      get_variable.
+      get_variable. In Eager mode, this argument is always forced to be
+      tf.AUTO_REUSE.
     caching_device: string, callable, or None: the caching device passed to
       get_variable.
     partitioner: callable or `None`: the partitioner passed to `get_variable`.
@@ -862,7 +877,8 @@ class VariableScope(object):
     dtype: default type passed to get_variable (defaults to DT_FLOAT).
     use_resource: if False, create a normal Variable; if True create an
       experimental ResourceVariable with well-defined semantics. Defaults
-      to False (will later change to True).
+      to False (will later change to True). In Eager mode, this argument is
+      always forced to be True.
     constraint: An optional projection function to be applied to the variable
       after being updated by an `Optimizer` (e.g. used to implement norm
       constraints or value constraints for layer weights). The function must
@@ -903,6 +919,7 @@ class VariableScope(object):
       if self._partitioner is not None:
         raise NotImplementedError("Partitioned variables are not yet supported "
                                   "in Eager mode.")
+      self._reuse = AUTO_REUSE
       self._use_resource = True
 
   @property
@@ -963,6 +980,8 @@ class VariableScope(object):
 
   def set_use_resource(self, use_resource):
     """Sets whether to use ResourceVariables for this scope."""
+    if context.in_eager_mode() and not use_resource:
+      raise ValueError("In eager mode, use_resource cannot be set to false.")
     self._use_resource = use_resource
 
   def set_regularizer(self, regularizer):
@@ -1029,8 +1048,14 @@ class VariableScope(object):
       partitioner = self._partitioner
     if custom_getter is None:
       custom_getter = self._custom_getter
-    if reuse is None:
-      reuse = self._reuse
+    if context.in_graph_mode():
+      if reuse is None:
+        reuse = self._reuse
+      if use_resource is None:
+        use_resource = self._use_resource
+    else:
+      reuse = AUTO_REUSE
+      use_resource = True
 
     full_name = self.name + "/" + name if self.name else name
     # Variable names only depend on variable_scope (full_name here),
@@ -1050,12 +1075,6 @@ class VariableScope(object):
         constraint = self._constraint
       if dtype is None:
         dtype = self._dtype
-      if context.in_graph_mode():
-        if use_resource is None:
-          use_resource = self._use_resource
-      else:
-        use_resource = True
-
       return var_store.get_variable(
           full_name, shape=shape, dtype=dtype, initializer=initializer,
           regularizer=regularizer, reuse=reuse, trainable=trainable,
@@ -1232,7 +1251,8 @@ Args:
       must be known.
   use_resource: If False, creates a regular Variable. If true, creates an
     experimental ResourceVariable instead with well-defined semantics.
-    Defaults to False (will later change to True).
+    Defaults to False (will later change to True). In Eager mode, this argument
+    is always forced to be True.
   custom_getter: Callable that takes as a first argument the true getter, and
     allows overwriting the internal get_variable method.
     The signature of `custom_getter` should match that of this method,
@@ -1661,12 +1681,14 @@ def variable_scope(name_or_scope,
     reuse: `True`, None, or tf.AUTO_REUSE; if `True`, we go into reuse mode
       for this scope as well as all sub-scopes; if tf.AUTO_REUSE, we create
       variables if they do not exist, and return them otherwise; if None, we
-      inherit the parent scope's reuse flag.
+      inherit the parent scope's reuse flag. In Eager mode, this argument is
+      always forced to be tf.AUTO_REUSE.
     dtype: type of variables created in this scope (defaults to the type
       in the passed scope, or inherited from parent scope).
     use_resource: If False, all variables will be regular Variables. If True,
       experimental ResourceVariables with well-defined semantics will be used
-      instead. Defaults to False (will later change to True).
+      instead. Defaults to False (will later change to True). In Eager mode,
+      this argument is always forced to be True.
     constraint: An optional projection function to be applied to the variable
       after being updated by an `Optimizer` (e.g. used to implement norm
       constraints or value constraints for layer weights). The function must
-- 
GitLab


From cf4eaffb3ddcedb723355f01c5fec2cdc020a40b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 16:17:00 -0700
Subject: [PATCH 084/294] Add support for equal() and not_equal() to
 tf.contrib.util.constant_value().

PiperOrigin-RevId: 166920279
---
 tensorflow/python/framework/tensor_util.py    | 16 ++++++++++
 .../python/framework/tensor_util_test.py      | 30 +++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index 686e6a9be4..eea3d28a7e 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -689,6 +689,22 @@ def _ConstantValue(tensor, partial):
       return np.full(fill_shape.as_list(), fill_value, dtype=fill_value.dtype)
     else:
       return None
+  elif tensor.op.type == "Equal":
+    value1 = constant_value(tensor.op.inputs[0])
+    if value1 is None:
+      return None
+    value2 = constant_value(tensor.op.inputs[1])
+    if value2 is None:
+      return None
+    return np.equal(value1, value2)
+  elif tensor.op.type == "NotEqual":
+    value1 = constant_value(tensor.op.inputs[0])
+    if value1 is None:
+      return None
+    value2 = constant_value(tensor.op.inputs[1])
+    if value2 is None:
+      return None
+    return np.not_equal(value1, value2)
   else:
     return None
 
diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py
index 3a6dce11f4..2760f98a6b 100644
--- a/tensorflow/python/framework/tensor_util_test.py
+++ b/tensorflow/python/framework/tensor_util_test.py
@@ -800,6 +800,36 @@ class ConstantValueTest(test.TestCase):
     self.assertAllClose(input_, c_val[0])
     self.assertIsNone(c_val[1])
 
+  def testEqual(self):
+    # Scalar inputs.
+    tf_val = math_ops.equal(constant_op.constant(1), constant_op.constant(1))
+    self.assertEqual(tensor_util.constant_value(tf_val), True)
+
+    tf_val = math_ops.equal(constant_op.constant(1), constant_op.constant(0))
+    self.assertEqual(tensor_util.constant_value(tf_val), False)
+
+    # Shaped inputs with broadcast semantics.
+    tf_val = math_ops.equal(constant_op.constant([[0, 1]]),
+                            constant_op.constant([[0], [1]]))
+    c_val = tensor_util.constant_value(tf_val)
+    self.assertAllEqual(c_val, [[True, False], [False, True]])
+
+  def testNotEqual(self):
+    # Scalar inputs.
+    tf_val = math_ops.not_equal(constant_op.constant(1),
+                                constant_op.constant(1))
+    self.assertEqual(tensor_util.constant_value(tf_val), False)
+
+    tf_val = math_ops.not_equal(constant_op.constant(1),
+                                constant_op.constant(0))
+    self.assertEqual(tensor_util.constant_value(tf_val), True)
+
+    # Shaped inputs with broadcast semantics.
+    tf_val = math_ops.not_equal(constant_op.constant([[0, 1]]),
+                                constant_op.constant([[0], [1]]))
+    c_val = tensor_util.constant_value(tf_val)
+    self.assertAllEqual(c_val, [[False, True], [True, False]])
+
 
 class ConstantValueAsShapeTest(test.TestCase):
 
-- 
GitLab


From f64fd417124899df84fab9a852b2c12b9082a368 Mon Sep 17 00:00:00 2001
From: Igor Ganichev 
Date: Tue, 29 Aug 2017 16:45:59 -0700
Subject: [PATCH 085/294] Use size_t for string size

This fixes a broken build

PiperOrigin-RevId: 166924279
---
 tensorflow/core/lib/strings/str_util.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/lib/strings/str_util.cc b/tensorflow/core/lib/strings/str_util.cc
index 1159304724..8509c9a041 100644
--- a/tensorflow/core/lib/strings/str_util.cc
+++ b/tensorflow/core/lib/strings/str_util.cc
@@ -249,14 +249,14 @@ string Uppercase(StringPiece s) {
 }
 
 string ArgDefCase(StringPiece s) {
-  const int n = s.size();
+  const size_t n = s.size();
 
   // Compute the size of resulting string.
   // Number of extra underscores we will need to add.
-  int extra_us = 0;
+  size_t extra_us = 0;
   // Number of non-alpha chars in the beginning to skip.
-  int to_skip = 0;
-  for (int i = 0; i < n; ++i) {
+  size_t to_skip = 0;
+  for (size_t i = 0; i < n; ++i) {
     // If we are skipping and current letter is non-alpha, skip it as well
     if (i == to_skip && !isalpha(s[i])) {
       ++to_skip;
@@ -277,7 +277,7 @@ string ArgDefCase(StringPiece s) {
   string result(n + extra_us - to_skip, '_');
   // i - index into s
   // j - index into result
-  for (int i = to_skip, j = 0; i < n; ++i, ++j) {
+  for (size_t i = to_skip, j = 0; i < n; ++i, ++j) {
     DCHECK_LT(j, result.size());
     char c = s[i];
     // If c is not alphanumeric, we don't need to do anything
-- 
GitLab


From 1d1fcc0fece06426ebc4a4c90cc1a06efc234a89 Mon Sep 17 00:00:00 2001
From: Benoit Steiner 
Date: Tue, 29 Aug 2017 16:51:56 -0700
Subject: [PATCH 086/294] The GPU devices have been renamed devices:GPU. Update
 the code accordingly

PiperOrigin-RevId: 166925106
---
 tensorflow/core/common_runtime/step_stats_collector.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc
index d410a164ea..ee12624074 100644
--- a/tensorflow/core/common_runtime/step_stats_collector.cc
+++ b/tensorflow/core/common_runtime/step_stats_collector.cc
@@ -40,8 +40,8 @@ static int ExtractGpuWithStreamAll(string device_name) {
   scanner.OneLiteral("lla:maerts/");
   // Capture the digits if present
   scanner.RestartCapture().Many(strings::Scanner::DIGIT).StopCapture();
-  // Check that the digits are preceded by the 'gpu:' string
-  scanner.OneLiteral(":upg");
+  // Check that the digits are preceded by the 'device:GPU:' string
+  scanner.OneLiteral(":UPG:ecived");
   StringPiece capture;
   bool matched = scanner.GetResult(nullptr, &capture);
 
@@ -69,8 +69,8 @@ static int ExtractGpuWithoutStream(string device_name) {
   strings::Scanner scanner(device_name);
   // Capture the trailing digits if present
   scanner.RestartCapture().Many(strings::Scanner::DIGIT).StopCapture();
-  // Check that the digits are preceded by the 'gpu:' string
-  scanner.OneLiteral(":upg");
+  // Check that the digits are preceded by the 'device:GPU:' string
+  scanner.OneLiteral(":UPG:ecived");
   StringPiece capture;
   bool matched = scanner.GetResult(nullptr, &capture);
 
-- 
GitLab


From 5201a821ec501f9a030b80042e10903d3b4596bc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Tue, 29 Aug 2017 17:19:00 -0700
Subject: [PATCH 087/294] Fixes to quantile ops in TFT. 1. Use to_int64 in TFT
 to generate int64 output. Earlier change to output type in
 BucketizeWithInputBoundariesOp causes failure in ExpandDims during infra
 validation in TFX pipeline. 2. By default use a non-zero epsilon in boundary
 calculation, since exact boundary computation with epsilong=0 uses too much
 memory.

PiperOrigin-RevId: 166928699
---
 .../contrib/boosted_trees/kernels/quantile_ops.cc      | 10 +++++++---
 tensorflow/contrib/boosted_trees/ops/quantile_ops.cc   |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc b/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc
index bbcc308677..3ccc36dff8 100644
--- a/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc
+++ b/tensorflow/contrib/boosted_trees/kernels/quantile_ops.cc
@@ -901,7 +901,7 @@ class BucketizeWithInputBoundariesOp : public OpKernel {
     Tensor* output_tensor = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
                                                      &output_tensor));
-    auto output = output_tensor->template flat();
+    auto output = output_tensor->template flat();
 
     for (size_t i = 0; i < input.size(); i++) {
       output(i) = CalculateBucketIndex(input(i));
@@ -909,10 +909,14 @@ class BucketizeWithInputBoundariesOp : public OpKernel {
   }
 
  private:
-  int64 CalculateBucketIndex(const T value) {
+  int32 CalculateBucketIndex(const T value) {
     auto first_bigger_it =
         std::upper_bound(boundaries_.begin(), boundaries_.end(), value);
-    return first_bigger_it - boundaries_.begin();
+    int32 index = first_bigger_it - boundaries_.begin();
+    CHECK(index >= 0 && index <= boundaries_.size())
+        << "Invalid bucket index: " << index
+        << " boundaries_.size(): " << boundaries_.size();
+    return index;
   }
   std::vector boundaries_;
 };
diff --git a/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc b/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc
index ac8a5201a0..0336008e86 100644
--- a/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc
+++ b/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc
@@ -289,7 +289,7 @@ the sparse feature tensors.
 REGISTER_OP("BucketizeWithInputBoundaries")
     .Input("input: T")
     .Input("boundaries: float")
-    .Output("output: int64")
+    .Output("output: int32")
     .Attr("T: {int32, int64, float, double}")
     .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
-- 
GitLab


From 943d606a7475d61f6453c503b39a4316c84e8b20 Mon Sep 17 00:00:00 2001
From: Alexandre Passos 
Date: Tue, 29 Aug 2017 17:19:58 -0700
Subject: [PATCH 088/294] Fixes stray isinstance(_, Tensor) check in math_ops.

PiperOrigin-RevId: 166928826
---
 tensorflow/python/eager/backprop_test.py | 10 ++++++++++
 tensorflow/python/ops/math_ops.py        |  6 ++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 0b9a68db67..8a13f9c068 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -25,6 +25,7 @@ from tensorflow.python.eager import tape
 from tensorflow.python.eager import tensor
 from tensorflow.python.eager import tensor_node
 from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
@@ -271,6 +272,15 @@ class BackpropTest(test.TestCase):
     self.assertEqual([dtypes.float32],
                      backprop.make_attr([pywrap_tensorflow.TF_ATTR_TYPE], [1]))
 
+  def testMulType(self):
+
+    def mul(x):
+      return math_ops._mul_dispatch(x, x)  # pylint: disable=protected-access
+
+    self.assertAllEqual(
+        backprop.gradients_function(mul)(constant_op.constant(3.0))[0].numpy(),
+        6.0)
+
   def testMakeAttrShape(self):
     for s in ([], None, [1, 2, 3], [None, None], [1, None, 3]):
       expected = tensor_shape.TensorShape(s).as_proto()
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index fd8a5daa33..83f854630e 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -144,6 +144,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from autograd import core as ag_core
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
@@ -1112,11 +1113,12 @@ floormod = gen_math_ops._floor_mod
 
 def _mul_dispatch(x, y, name=None):
   """Dispatches cwise mul for "Dense*Dense" and "Dense*Sparse"."""
-  is_tensor_y = isinstance(y, ops.Tensor)
+  is_tensor_y = isinstance(ag_core.getval(y), ops.Tensor)
   if is_tensor_y:
     return gen_math_ops._mul(x, y, name=name)
   else:
-    assert isinstance(y, sparse_tensor.SparseTensor)  # Case: Dense * Sparse.
+    assert isinstance(ag_core.getval(y),
+                      sparse_tensor.SparseTensor)  # Case: Dense * Sparse.
     new_vals = gen_sparse_ops.sparse_dense_cwise_mul(y.indices, y.values,
                                                      y.dense_shape, x, name)
     return sparse_tensor.SparseTensor(y.indices, new_vals, y.dense_shape)
-- 
GitLab


From b7816d0536dfd6c25499441d922aaea54ca8efed Mon Sep 17 00:00:00 2001
From: Alexandre Passos 
Date: Tue, 29 Aug 2017 17:22:16 -0700
Subject: [PATCH 089/294] Supporting control flow ops in eager mode. (for
 eager/graph mode code portability)

PiperOrigin-RevId: 166929114
---
 .../kernel_tests/control_flow_ops_py_test.py  | 46 +++++++++++++++++++
 tensorflow/python/ops/control_flow_ops.py     | 36 +++++++++++++--
 2 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index a43fe71b9f..6e81e1fdbd 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -30,6 +30,7 @@ from six.moves import xrange  # pylint: disable=redefined-builtin
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import device_lib
 from tensorflow.python.client import session
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -2849,5 +2850,50 @@ class WhileOpBenchmark(test.Benchmark):
         name="unroll_same_device", iters=iters, wall_time=duration)
 
 
+class EagerTest(test.TestCase):
+
+  def testCond(self):
+    with context.eager_mode():
+      pred = math_ops.less(1, 2)
+      fn1 = lambda: constant_op.constant(10)
+      fn2 = lambda: constant_op.constant(20)
+      r = control_flow_ops.cond(pred, fn1, fn2)
+
+      self.assertAllEqual(r.numpy(), 10)
+
+  def testWhileLoop(self):
+    with context.eager_mode():
+      tensor = constant_op.constant([1, 2, 3, 4, 5])
+      self.assertAllEqual(isum(tensor).numpy(),
+                          [46, 47, 48, 49, 50])
+
+  def testWithDependencies(self):
+    with context.eager_mode():
+      t1 = constant_op.constant(1)
+      t2 = constant_op.constant(2)
+      t3 = control_flow_ops.with_dependencies(t1, t2)
+      self.assertAllEqual(t2.numpy(), t3.numpy())
+
+  def testTuple(self):
+    with context.eager_mode():
+      t1 = constant_op.constant(1)
+      t2 = constant_op.constant(2)
+      tup1, tup2 = control_flow_ops.tuple([t1, t2])
+      self.assertAllEqual(t1.numpy(), tup1.numpy())
+      self.assertAllEqual(t2.numpy(), tup2.numpy())
+
+  def testCase(self):
+    with context.eager_mode():
+      x = constant_op.constant(1)
+      y = constant_op.constant(2)
+      z = constant_op.constant(3)
+      f1 = lambda: constant_op.constant(17)
+      f2 = lambda: constant_op.constant(23)
+      f3 = lambda: constant_op.constant(-1)
+
+      r1 = control_flow_ops.case([(x < y, f1), (x > z, f2)],
+                                 default=f3, exclusive=True)
+      self.assertAllEqual(r1.numpy(), 17)
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 89de88a530..97b37ea027 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -1813,6 +1813,11 @@ def cond(pred, true_fn=None, false_fn=None, strict=False, name=None,
   if not callable(false_fn):
     raise TypeError("false_fn must be callable.")
 
+  if context.in_eager_mode():
+    if pred:
+      return true_fn()
+    return false_fn()
+
   with ops.name_scope(name, "cond", [pred]):
     # Add the Switch to the graph.
     if isinstance(pred, bool):
@@ -2671,7 +2676,7 @@ def while_loop(cond, body, loop_vars, shape_invariants=None,
   Note that `while_loop` calls `cond` and `body` *exactly once* (inside the
   call to `while_loop`, and not at all during `Session.run()`). `while_loop`
   stitches together the graph fragments created during the `cond` and `body`
-  calls with some additional graph nodes to create the graph flow that 
+  calls with some additional graph nodes to create the graph flow that
   repeats `body` until `cond` returns false.
 
   For correctness, `tf.while_loop()` strictly enforces shape invariants for
@@ -2779,12 +2784,17 @@ def while_loop(cond, body, loop_vars, shape_invariants=None,
     if parallel_iterations < 1:
       raise TypeError("parallel_iterations must be a positive integer.")
 
+    if context.in_eager_mode():
+      while cond(*loop_vars):
+        loop_vars = body(*loop_vars)
+      return loop_vars
+
     if shape_invariants is not None:
       nest.assert_same_structure(loop_vars, shape_invariants)
 
-    context = WhileContext(parallel_iterations, back_prop, swap_memory)  # pylint: disable=redefined-outer-name
-    ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context)
-    result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
+    loop_context = WhileContext(parallel_iterations, back_prop, swap_memory)  # pylint: disable=redefined-outer-name
+    ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
+    result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
     return result
 
 
@@ -2850,6 +2860,8 @@ def with_dependencies(dependencies, output_tensor, name=None):
   Raises:
     TypeError: if `output_tensor` is not a `Tensor` or `IndexedSlices`.
   """
+  if context.in_eager_mode():
+    return output_tensor
   with ops.name_scope(name, "control_dependency",
                       list(dependencies) + [output_tensor]) as name:
     with ops.colocate_with(output_tensor):
@@ -2962,6 +2974,8 @@ def tuple(tensors, name=None, control_inputs=None):
       objects.
 
   """
+  if context.in_eager_mode():
+    return tensors
   with ops.name_scope(name, "tuple", tensors) as name:
     gating_ops = [t.op for t in tensors if t is not None]
     if control_inputs:
@@ -3087,12 +3101,18 @@ def case(pred_fn_pairs, default=None, exclusive=False, strict=False,
     TypeError: If `pred_fn_pairs` is a list but does not contain 2-tuples.
     TypeError: If `fns[i]` is not callable for any i, or `default` is not
                callable.
+    ValueError: If in eager mode and all predicates are false and no
+               default is provided.
+    ValueError: If in eager mode and is passed a dictionary.
   """
   pfp = pred_fn_pairs  # For readability
   if not (isinstance(pfp, list) or isinstance(pfp, _basetuple)
           or isinstance(pfp, dict)):
     raise TypeError("fns must be a list, tuple, or dict")
   if isinstance(pfp, dict):
+    if context.in_eager_mode():
+      raise ValueError(
+          "In eager mode the predicates must be a list, not a dictionary.")
     if isinstance(pfp, collections.OrderedDict):
       pfp = pfp.items()
     else:
@@ -3113,6 +3133,14 @@ def case(pred_fn_pairs, default=None, exclusive=False, strict=False,
   if default is not None and not callable(default):
     raise TypeError("default must be callable.")
 
+  if context.in_eager_mode():
+    for pred, fn in pfp:
+      if pred:
+        return fn()
+    if default is None:
+      raise ValueError("tf.case received all false predicates and no default.")
+    return default()
+
   preds, fns = map(list, zip(*pfp))
   del pfp  # From now on, preds and fns form the source of truth.
 
-- 
GitLab


From 822a9208fcad065c632f5dac7f6efc0686b5892b Mon Sep 17 00:00:00 2001
From: Alexandre Passos 
Date: Tue, 29 Aug 2017 18:50:46 -0700
Subject: [PATCH 090/294] Guards references to tensor.op to avoid calling them
 in eager mode.

PiperOrigin-RevId: 166937613
---
 tensorflow/python/BUILD                   | 23 ++++++--
 tensorflow/python/ops/check_ops.py        |  9 ++-
 tensorflow/python/ops/data_flow_ops.py    | 72 +++++++++++++++--------
 tensorflow/python/ops/linalg_grad.py      |  2 +-
 tensorflow/python/ops/lookup_ops.py       |  7 ++-
 tensorflow/python/ops/math_grad.py        |  4 +-
 tensorflow/python/ops/math_ops.py         |  7 ++-
 tensorflow/python/ops/nn_grad.py          |  2 +-
 tensorflow/python/ops/state_ops.py        |  3 +-
 tensorflow/python/ops/tensor_array_ops.py |  5 +-
 10 files changed, 94 insertions(+), 40 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 28c3eef57b..89f0ab48a0 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1401,9 +1401,10 @@ py_library(
     deps = [
         ":array_ops",
         ":control_flow_ops",
-        ":framework",
         ":framework_for_generated_wrappers",
         ":math_ops",
+        ":sparse_tensor",
+        ":tensor_util",
         ":util",
         "//third_party/py/numpy",
     ],
@@ -1492,9 +1493,11 @@ py_library(
         ":array_ops",
         ":control_flow_ops",
         ":data_flow_ops_gen",
-        ":framework",
         ":framework_for_generated_wrappers",
         ":math_ops",
+        ":random_seed",
+        ":tensor_util",
+        "//tensorflow/python/eager:context",
         "@six_archive//:six",
     ],
 )
@@ -1690,10 +1693,14 @@ py_library(
     deps = [
         ":array_ops",
         ":constant_op",
-        ":framework",
+        ":control_flow_ops",
         ":framework_for_generated_wrappers",
         ":lookup_ops_gen",
         ":math_ops",
+        ":sparse_tensor",
+        ":string_ops",
+        ":util",
+        "//tensorflow/python/eager:context",
         "@six_archive//:six",
     ],
 )
@@ -1705,10 +1712,11 @@ py_library(
     deps = [
         ":array_ops",
         ":array_ops_gen",
-        ":framework",
         ":framework_for_generated_wrappers",
         ":math_ops",
         ":math_ops_gen",
+        ":tensor_util",
+        "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
     ],
 )
@@ -1735,6 +1743,7 @@ py_library(
         ":state_ops_gen",
         ":tensor_shape",
         ":util",
+        "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
     ],
 )
@@ -1805,6 +1814,8 @@ py_library(
         ":nn_ops",
         ":nn_ops_gen",
         ":sparse_ops",
+        ":tensor_util",
+        "//tensorflow/python/eager:context",
     ],
 )
 
@@ -2152,6 +2163,7 @@ py_library(
         ":resource_variable_ops_gen",
         ":state_ops_gen",
         ":tensor_shape",
+        "//tensorflow/python/eager:context",
     ],
 )
 
@@ -2213,6 +2225,7 @@ py_library(
         ":tensor_shape",
         ":tensor_util",
         ":util",
+        "//tensorflow/python/eager:context",
     ],
 )
 
@@ -2221,12 +2234,14 @@ py_library(
     srcs = ["ops/variable_scope.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":array_ops",
         ":dtypes",
         ":framework_ops",
         ":init_ops",
         ":platform",
         ":resource_variable_ops",
         ":tensor_shape",
+        ":util",
         ":variables",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/estimator:util",
diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py
index b3e724be9c..fb48175285 100644
--- a/tensorflow/python/ops/check_ops.py
+++ b/tensorflow/python/ops/check_ops.py
@@ -46,6 +46,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
@@ -861,8 +862,12 @@ def assert_type(tensor, tf_type, message=None, name=None):
   with ops.name_scope(name, 'assert_type', [tensor]):
     tensor = ops.convert_to_tensor(tensor, name='tensor')
     if tensor.dtype != tf_type:
-      raise TypeError(
-          '%s  %s must be of type %s' % (message, tensor.op.name, tf_type))
+      if context.in_graph_mode():
+        raise TypeError(
+            '%s  %s must be of type %s' % (message, tensor.name, tf_type))
+      else:
+        raise TypeError(
+            '%s tensor must be of type %s' % (message, tf_type))
 
     return control_flow_ops.no_op('statically_determined_correct_type')
 
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index 2a2ef5809d..41dd7f1467 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -25,6 +25,7 @@ import threading
 
 import six
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes as _dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
@@ -160,7 +161,10 @@ class QueueBase(object):
     else:
       self._names = None
     self._queue_ref = queue_ref
-    self._name = self._queue_ref.op.name.split("/")[-1]
+    if context.in_graph_mode():
+      self._name = self._queue_ref.op.name.split("/")[-1]
+    else:
+      self._name = context.context().scope_name
 
   @staticmethod
   def from_list(index, queues):
@@ -208,7 +212,9 @@ class QueueBase(object):
   @property
   def name(self):
     """The name of the underlying queue."""
-    return self._queue_ref.op.name
+    if context.in_graph_mode():
+      return self._queue_ref.op.name
+    return self._name
 
   @property
   def dtypes(self):
@@ -419,9 +425,10 @@ class QueueBase(object):
 
     # NOTE(mrry): Not using a shape function because we need access to
     # the `QueueBase` object.
-    op = ret[0].op
-    for output, shape in zip(op.values(), self._shapes):
-      output.set_shape(shape)
+    if context.in_graph_mode():
+      op = ret[0].op
+      for output, shape in zip(op.values(), self._shapes):
+        output.set_shape(shape)
 
     return self._dequeue_return_value(ret)
 
@@ -458,10 +465,13 @@ class QueueBase(object):
 
     # NOTE(mrry): Not using a shape function because we need access to
     # the Queue object.
-    op = ret[0].op
-    batch_dim = tensor_shape.Dimension(tensor_util.constant_value(op.inputs[1]))
-    for output, shape in zip(op.values(), self._shapes):
-      output.set_shape(tensor_shape.TensorShape([batch_dim]).concatenate(shape))
+    if context.in_graph_mode():
+      op = ret[0].op
+      batch_dim = tensor_shape.Dimension(
+          tensor_util.constant_value(op.inputs[1]))
+      for output, shape in zip(op.values(), self._shapes):
+        output.set_shape(
+            tensor_shape.TensorShape([batch_dim]).concatenate(shape))
 
     return self._dequeue_return_value(ret)
 
@@ -499,9 +509,10 @@ class QueueBase(object):
 
     # NOTE(mrry): Not using a shape function because we need access to
     # the Queue object.
-    op = ret[0].op
-    for output, shape in zip(op.values(), self._shapes):
-      output.set_shape(tensor_shape.TensorShape([None]).concatenate(shape))
+    if context.in_graph_mode():
+      op = ret[0].op
+      for output, shape in zip(op.values(), self._shapes):
+        output.set_shape(tensor_shape.TensorShape([None]).concatenate(shape))
 
     return self._dequeue_return_value(ret)
 
@@ -897,7 +908,10 @@ class Barrier(object):
     self._barrier_ref = gen_data_flow_ops._barrier(
         component_types=self._types, shapes=self._shapes,
         shared_name=shared_name, name=name)
-    self._name = self._barrier_ref.op.name.split("/")[-1]
+    if context.in_graph_mode():
+      self._name = self._barrier_ref.op.name.split("/")[-1]
+    else:
+      self._name = context.context().scope_name
 
   @property
   def barrier_ref(self):
@@ -907,7 +921,9 @@ class Barrier(object):
   @property
   def name(self):
     """The name of the underlying barrier."""
-    return self._barrier_ref.op.name
+    if context.in_graph_mode():
+      return self._barrier_ref.op.name
+    return self._name
 
   def insert_many(self, component_index, keys, values, name=None):
     """For each key, assigns the respective value to the specified component.
@@ -984,16 +1000,19 @@ class Barrier(object):
 
     # NOTE(mrry): Not using a shape function because we need access to
     # the Barrier object.
-    op = ret[0].op
-    if allow_small_batch:
-      batch_dim = None
-    else:
-      batch_dim = tensor_shape.Dimension(
-          tensor_util.constant_value(op.inputs[1]))
-    op.outputs[0].set_shape(tensor_shape.vector(batch_dim))  # indices
-    op.outputs[1].set_shape(tensor_shape.vector(batch_dim))  # keys
-    for output, shape in zip(op.outputs[2:], self._shapes):  # value_list
-      output.set_shape(tensor_shape.TensorShape([batch_dim]).concatenate(shape))
+    if context.in_graph_mode():
+      op = ret[0].op
+      if allow_small_batch:
+        batch_dim = None
+      else:
+        batch_dim = tensor_shape.Dimension(
+            tensor_util.constant_value(op.inputs[1]))
+      op.outputs[0].set_shape(tensor_shape.vector(batch_dim))  # indices
+      op.outputs[1].set_shape(tensor_shape.vector(batch_dim))  # keys
+      for output, shape in zip(op.outputs[2:], self._shapes):  # value_list
+        output.set_shape(
+            tensor_shape.TensorShape([batch_dim]).concatenate(
+                shape))
 
     return ret
 
@@ -1081,7 +1100,10 @@ class ConditionalAccumulatorBase(object):
     else:
       self._shape = tensor_shape.unknown_shape()
     self._accumulator_ref = accumulator_ref
-    self._name = self._accumulator_ref.op.name.split("/")[-1]
+    if context.in_graph_mode():
+      self._name = self._accumulator_ref.op.name.split("/")[-1]
+    else:
+      self._name = context.context().scope_name
 
   @property
   def accumulator_ref(self):
diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py
index 9cc05e7a6a..460bf3f3f8 100644
--- a/tensorflow/python/ops/linalg_grad.py
+++ b/tensorflow/python/ops/linalg_grad.py
@@ -203,7 +203,7 @@ def _SelfAdjointEigV2Grad(op, grad_e, grad_v):
   compute_v = op.get_attr("compute_v")
   # a = op.inputs[0], which satisfies
   # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i]
-  with ops.control_dependencies([grad_e.op, grad_v.op]):
+  with ops.control_dependencies([grad_e, grad_v]):
     if compute_v:
       v = op.outputs[1]
       # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0).
diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index ab9e3b0af9..f9b1733dda 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import collections
 import functools
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -152,9 +153,13 @@ class InitializableLookupTableBase(LookupInterface):
       default_value: The value to use if a key is missing in the table.
       initializer: The table initializer to use.
     """
+    if context.in_graph_mode():
+      name = table_ref.op.name.split("/")[-1]
+    else:
+      name = context.context().scope_name
     super(InitializableLookupTableBase,
           self).__init__(initializer.key_dtype, initializer.value_dtype,
-                         table_ref.op.name.split("/")[-1])
+                         name)
     self._table_ref = table_ref
     self._default_value = ops.convert_to_tensor(
         default_value, dtype=self._value_dtype)
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index bf8dc17298..4175f6ec44 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -909,7 +909,7 @@ def _SparseMatMulGrad(op, grad):
       op.inputs[0]: op.get_attr("a_is_sparse"),
       op.inputs[1]: op.get_attr("b_is_sparse"),
       # Use heuristic to figure out if grad might be sparse
-      grad: (grad.op.type == "ReluGrad")
+      grad: context.in_graph_mode() and (grad.op.type == "ReluGrad")
   }
 
   def _SparseMatMul(t1, t2, out_dtype, transpose_a=False, transpose_b=False):
@@ -1033,7 +1033,7 @@ def _ImagGrad(_, grad):
 def _AngleGrad(op, grad):
   """Returns -grad / (Im(x) + iRe(x))"""
   x = op.inputs[0]
-  with ops.control_dependencies([grad.op]):
+  with ops.control_dependencies([grad]):
     re = math_ops.real(x)
     im = math_ops.imag(x)
     z = math_ops.reciprocal(math_ops.complex(im, re))
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 83f854630e..6559929560 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -148,6 +148,7 @@ from autograd import core as ag_core
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -2041,6 +2042,10 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):
     ValueError: If `inputs` don't all have same shape and dtype or the shape
     cannot be inferred.
   """
+  if context.in_eager_mode():
+    # TODO(apassos) remove this once the lifetime of eager variables gets
+    # addressed.
+    raise ValueError("accumulate_n not supported in eager mode")
   if not inputs or not isinstance(inputs, (list, tuple)):
     raise ValueError("inputs must be a list of at least one Tensor with the "
                      "same dtype and shape")
@@ -2062,7 +2067,7 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None):
     tensor_dtype = inputs[0].dtype
   if tensor_dtype != inputs[0].dtype:
     raise TypeError("tensor_dtype is {}, but input is of type {}"
-                     .format(tensor_dtype, inputs[0].dtype))
+                    .format(tensor_dtype, inputs[0].dtype))
   if len(inputs) == 1:
     return inputs[0]
   with ops.name_scope(name, "AccumulateN", inputs) as name:
diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py
index 45d80b43f1..4ad0603e56 100644
--- a/tensorflow/python/ops/nn_grad.py
+++ b/tensorflow/python/ops/nn_grad.py
@@ -375,7 +375,7 @@ def _SoftplusGradGrad(op, grad):
   #   dx = gen_nn_ops._softplus_grad(dy, x) = dy / (1 + exp(-x))
   # This op computes (ddy, d2x) from op.inputs == [dy, x] and grad == ddx.
   dy, x = op.inputs
-  with ops.control_dependencies([grad.op]):
+  with ops.control_dependencies([grad]):
     ddy = gen_nn_ops._softplus_grad(grad, x)  # pylint: disable=protected-access
     d2x = grad * dy / (math_ops.exp(-x) + 2.0 + math_ops.exp(x))
     return (ddy, d2x)
diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py
index 684d34b170..f54bbfe90e 100644
--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@@ -81,6 +81,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import gen_resource_variable_ops
@@ -183,7 +184,7 @@ def is_variable_initialized(ref, name=None):
   if ref.dtype._is_ref_dtype:
     return gen_state_ops.is_variable_initialized(ref=ref, name=name)
   # Handle resource variables.
-  if ref.op.type == "VarHandleOp":
+  if context.in_eager_mode() or ref.op.type == "VarHandleOp":
     return gen_resource_variable_ops.var_is_initialized_op(ref.handle,
                                                            name=name)
 
diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py
index 20ae082ee1..08325ba771 100644
--- a/tensorflow/python/ops/tensor_array_ops.py
+++ b/tensorflow/python/ops/tensor_array_ops.py
@@ -24,6 +24,7 @@ from __future__ import print_function
 
 import contextlib
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
@@ -445,7 +446,7 @@ class TensorArray(object):
       ta._infer_shape = self._infer_shape
       ta._element_shape = self._element_shape
       ta._colocate_with = self._colocate_with
-      if ta._infer_shape:
+      if ta._infer_shape and context.in_graph_mode():
         val_shape = flow_out.op.inputs[2].get_shape()
         element_shape = tensor_shape.unknown_shape()
         if val_shape.dims is not None:
@@ -487,7 +488,7 @@ class TensorArray(object):
       ta._infer_shape = self._infer_shape
       ta._element_shape = self._element_shape
       ta._colocate_with = self._colocate_with
-      if ta._infer_shape:
+      if ta._infer_shape and context.in_graph_mode():
         val_shape = flow_out.op.inputs[1].get_shape()
         clengths = tensor_util.constant_value(flow_out.op.inputs[2])
         element_shape = tensor_shape.unknown_shape()
-- 
GitLab


From c82d70d383a97045293a59a77e5280c2d3cebaab Mon Sep 17 00:00:00 2001
From: Saurabh Saxena 
Date: Wed, 30 Aug 2017 00:30:26 -0700
Subject: [PATCH 091/294] Add support for saving/restoring states of iterators.
 All iterators that need state saving will have to implement SaveState and
 RestoreState methods. Add SaveIterator and RestoreIterator ops. Add
 implementations for RangeDataset, RepeatDataset and FixedLengthRecordDataset
 and related tests. More to follow in future CLs.

PiperOrigin-RevId: 166959426
---
 .../kernel_tests/range_dataset_op_test.py     | 202 ++++++++++++++++++
 .../kernel_tests/reader_dataset_ops_test.py   | 132 +++++++++++-
 tensorflow/core/kernels/BUILD                 |   1 +
 tensorflow/core/kernels/dataset.cc            |   2 +
 tensorflow/core/kernels/dataset.h             | 129 +++++++++++
 tensorflow/core/kernels/iterator_ops.cc       |  55 +++++
 tensorflow/core/kernels/range_dataset_op.cc   |  20 +-
 tensorflow/core/kernels/reader_dataset_ops.cc |  46 ++++
 tensorflow/core/kernels/repeat_dataset_op.cc  |  18 ++
 tensorflow/core/ops/dataset_ops.cc            |  18 ++
 10 files changed, 615 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
index a8edbbd20c..87bab43ccf 100644
--- a/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/range_dataset_op_test.py
@@ -17,17 +17,29 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
 from tensorflow.contrib.data.python.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 
 
 class RangeDatasetTest(test.TestCase):
 
+  def tearDown(self):
+    # Remove all checkpoint files.
+    prefix = self._iterator_checkpoint_prefix()
+    pattern = prefix + "*"
+    files = gfile.Glob(pattern)
+    map(gfile.Remove, files)
+
   def testStop(self):
     stop = array_ops.placeholder(dtypes.int64, shape=[])
     iterator = dataset_ops.Dataset.range(stop).make_initializable_iterator()
@@ -175,6 +187,196 @@ class RangeDatasetTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  def _iterator_checkpoint_prefix(self):
+    return os.path.join(self.get_temp_dir(), "iterator")
+
+  def testSaveRestore(self):
+
+    def _build_graph(start, stop):
+      iterator = dataset_ops.Dataset.range(start,
+                                           stop).make_initializable_iterator()
+      init_op = iterator.initializer
+      get_next = iterator.get_next()
+      path = self._iterator_checkpoint_prefix()
+      save_op = gen_dataset_ops.save_iterator(iterator._iterator_resource, path)
+      restore_op = gen_dataset_ops.restore_iterator(iterator._iterator_resource,
+                                                    path)
+      return init_op, get_next, save_op, restore_op
+
+    # Saving and restoring in different sessions.
+    start = 2
+    stop = 10
+    break_point = 5
+    with ops.Graph().as_default() as g:
+      init_op, get_next, save_op, _ = _build_graph(start, stop)
+      with self.test_session(graph=g) as sess:
+        sess.run(variables.global_variables_initializer())
+        sess.run(init_op)
+        for i in range(start, break_point):
+          self.assertEqual(i, sess.run(get_next))
+        sess.run(save_op)
+
+    with ops.Graph().as_default() as g:
+      init_op, get_next, _, restore_op = _build_graph(start, stop)
+      with self.test_session(graph=g) as sess:
+        sess.run(init_op)
+        sess.run(restore_op)
+        for i in range(break_point, stop):
+          self.assertEqual(i, sess.run(get_next))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next)
+
+    # Saving and restoring in same session.
+    with ops.Graph().as_default() as g:
+      init_op, get_next, save_op, restore_op = _build_graph(start, stop)
+      with self.test_session(graph=g) as sess:
+        sess.run(variables.global_variables_initializer())
+        sess.run(init_op)
+        for i in range(start, break_point):
+          self.assertEqual(i, sess.run(get_next))
+        sess.run(save_op)
+        sess.run(restore_op)
+        for i in range(break_point, stop):
+          self.assertEqual(i, sess.run(get_next))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next)
+
+  def testMultipleSaves(self):
+
+    def _build_graph(start, stop):
+      iterator = dataset_ops.Dataset.range(start,
+                                           stop).make_initializable_iterator()
+      init_op = iterator.initializer
+      get_next = iterator.get_next()
+      path = self._iterator_checkpoint_prefix()
+      save_op = gen_dataset_ops.save_iterator(iterator._iterator_resource, path)
+      restore_op = gen_dataset_ops.restore_iterator(iterator._iterator_resource,
+                                                    path)
+      return init_op, get_next, save_op, restore_op
+
+    start = 2
+    stop = 10
+    break_point1 = 5
+    break_point2 = 7
+
+    with ops.Graph().as_default() as g:
+      init_op, get_next, save_op, _ = _build_graph(start, stop)
+      with self.test_session(graph=g) as sess:
+        sess.run(variables.global_variables_initializer())
+        sess.run(init_op)
+        for i in range(start, break_point1):
+          self.assertEqual(i, sess.run(get_next))
+        sess.run(save_op)
+
+    with ops.Graph().as_default() as g:
+      init_op, get_next, save_op, restore_op = _build_graph(start, stop)
+      with self.test_session(graph=g) as sess:
+        sess.run(init_op)
+        sess.run(restore_op)
+        for i in range(break_point1, break_point2):
+          self.assertEqual(i, sess.run(get_next))
+        sess.run(save_op)
+
+    break_point2 = 7
+    with ops.Graph().as_default() as g:
+      init_op, get_next, save_op, restore_op = _build_graph(start, stop)
+      with self.test_session(graph=g) as sess:
+        sess.run(init_op)
+        sess.run(restore_op)
+        for i in range(break_point2, stop):
+          self.assertEqual(i, sess.run(get_next))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next)
+
+  def testSaveRestoreWithRepeat(self):
+
+    def _build_graph(start, stop, num_epochs):
+      iterator = dataset_ops.Dataset.range(
+          start, stop).repeat(num_epochs).make_initializable_iterator()
+      init_op = iterator.initializer
+      get_next = iterator.get_next()
+      path = self._iterator_checkpoint_prefix()
+      save_op = gen_dataset_ops.save_iterator(iterator._iterator_resource, path)
+      restore_op = gen_dataset_ops.restore_iterator(iterator._iterator_resource,
+                                                    path)
+      return init_op, get_next, save_op, restore_op
+
+    start = 2
+    stop = 10
+    num_epochs = 5
+    break_range = 5
+    break_epoch = 3
+    with ops.Graph().as_default() as g:
+      init_op, get_next, save_op, restore_op = _build_graph(
+          start, stop, num_epochs)
+      with self.test_session(graph=g) as sess:
+        sess.run(variables.global_variables_initializer())
+        sess.run(init_op)
+        # Note: There is no checkpoint saved currently so a NotFoundError is
+        # raised.
+        with self.assertRaises(errors.NotFoundError):
+          sess.run(restore_op)
+        for _ in range(break_epoch - 1):
+          for i in range(start, stop):
+            self.assertEqual(i, sess.run(get_next))
+        for i in range(start, break_range):
+          self.assertEqual(i, sess.run(get_next))
+        sess.run(save_op)
+
+    with ops.Graph().as_default() as g:
+      init_op, get_next, _, restore_op = _build_graph(start, stop, num_epochs)
+      with self.test_session(graph=g) as sess:
+        sess.run(init_op)
+        sess.run(restore_op)
+        for i in range(break_range, stop):
+          self.assertEqual(i, sess.run(get_next))
+        for _ in range(break_epoch, num_epochs):
+          for i in range(start, stop):
+            self.assertEqual(i, sess.run(get_next))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next)
+
+  def testSaveRestoreExhaustedIterator(self):
+
+    def _build_graph(start, stop, num_epochs):
+      iterator = dataset_ops.Dataset.range(
+          start, stop).repeat(num_epochs).make_initializable_iterator()
+      init_op = iterator.initializer
+      get_next = iterator.get_next()
+      path = self._iterator_checkpoint_prefix()
+      save_op = gen_dataset_ops.save_iterator(iterator._iterator_resource, path)
+      restore_op = gen_dataset_ops.restore_iterator(iterator._iterator_resource,
+                                                    path)
+      return init_op, get_next, save_op, restore_op
+
+    start = 2
+    stop = 10
+    num_epochs = 5
+    with ops.Graph().as_default() as g:
+      init_op, get_next, save_op, restore_op = _build_graph(
+          start, stop, num_epochs)
+      with self.test_session(graph=g) as sess:
+        sess.run(variables.global_variables_initializer())
+        sess.run(init_op)
+        # Note: There is no checkpoint saved currently so a NotFoundError is
+        # raised.
+        with self.assertRaises(errors.NotFoundError):
+          sess.run(restore_op)
+        for _ in range(num_epochs):
+          for i in range(start, stop):
+            self.assertEqual(i, sess.run(get_next))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next)
+        sess.run(save_op)
+
+    with ops.Graph().as_default() as g:
+      init_op, get_next, _, restore_op = _build_graph(start, stop, num_epochs)
+      with self.test_session(graph=g) as sess:
+        sess.run(init_op)
+        sess.run(restore_op)
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py
index 72a3ff1789..d631fbc76e 100644
--- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.lib.io import python_io
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.platform import test
 from tensorflow.python.util import compat
@@ -255,7 +256,6 @@ class FixedLengthRecordReaderTest(test.TestCase):
 
   def testFixedLengthRecordDatasetBuffering(self):
     test_filenames = self._createFiles()
-
     dataset = dataset_ops.FixedLengthRecordDataset(
         test_filenames,
         self._record_bytes,
@@ -271,6 +271,124 @@ class FixedLengthRecordReaderTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(iterator.get_next())
 
+  def _build_iterator_graph(self, num_epochs):
+    filenames = self._createFiles()
+    path = os.path.join(self.get_temp_dir(), "iterator")
+    dataset = (dataset_ops.FixedLengthRecordDataset(
+        filenames, self._record_bytes, self._header_bytes, self._footer_bytes)
+               .repeat(num_epochs))
+    iterator = dataset.make_initializable_iterator()
+    init_op = iterator.initializer
+    get_next_op = iterator.get_next()
+    save_op = gen_dataset_ops.save_iterator(iterator._iterator_resource, path)
+    restore_op = gen_dataset_ops.restore_iterator(iterator._iterator_resource,
+                                                  path)
+    return init_op, get_next_op, save_op, restore_op
+
+  def testSaveRestore(self):
+    num_epochs = 10
+    epoch_break = 5
+    file_break = self._num_files // 2
+    record_break = self._num_records // 2
+
+    with ops.Graph().as_default() as g:
+      init_op, get_next_op, save_op, restore_op = self._build_iterator_graph(
+          num_epochs=num_epochs)
+      with self.test_session(graph=g) as sess:
+        sess.run(init_op)
+        # Note: There is no checkpoint saved currently so a NotFoundError is
+        # raised.
+        with self.assertRaises(errors.NotFoundError):
+          sess.run(restore_op)
+        for epoch in range(num_epochs):
+          for f in range(self._num_files):
+            for r in range(self._num_records):
+              if (epoch == epoch_break and f == file_break and
+                  r == record_break):
+                sess.run(save_op)
+                break
+              self.assertEqual(self._record(f, r), sess.run(get_next_op))
+            else:
+              continue
+            break
+          else:
+            continue
+          break
+        else:
+          with self.assertRaises(errors.OutOfRangeError):
+            sess.run(get_next_op)
+
+    with ops.Graph().as_default() as g:
+      init_op, get_next_op, save_op, restore_op = self._build_iterator_graph(
+          num_epochs=num_epochs)
+      with self.test_session(graph=g) as sess:
+        sess.run(init_op)
+        sess.run(restore_op)
+        for epoch in range(num_epochs):
+          for f in range(self._num_files):
+            for r in range(self._num_records):
+              if (epoch < epoch_break or
+                  (epoch == epoch_break and f < file_break) or
+                  (epoch == epoch_break and f == file_break and
+                   r < record_break)):
+                continue
+              self.assertEqual(self._record(f, r), sess.run(get_next_op))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next_op)
+
+  def testRestoreUnusedIterator(self):
+    num_epochs = 10
+    with ops.Graph().as_default() as g:
+      init_op, get_next_op, save_op, restore_op = self._build_iterator_graph(
+          num_epochs=num_epochs)
+      with self.test_session(graph=g) as sess:
+        sess.run(init_op)
+        # Note: There is no checkpoint saved currently so a NotFoundError is
+        # raised.
+        with self.assertRaises(errors.NotFoundError):
+          sess.run(restore_op)
+        # Save unused iterator.
+        sess.run(save_op)
+    with ops.Graph().as_default() as g:
+      init_op, get_next_op, save_op, restore_op = self._build_iterator_graph(
+          num_epochs=num_epochs)
+      with self.test_session(graph=g) as sess:
+        sess.run(init_op)
+        sess.run(restore_op)
+        for _ in range(num_epochs * self._num_files * self._num_records):
+          sess.run(get_next_op)
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next_op)
+
+  def testRestoreExhaustedIterator(self):
+    num_epochs = 10
+
+    with ops.Graph().as_default() as g:
+      init_op, get_next_op, save_op, restore_op = self._build_iterator_graph(
+          num_epochs=num_epochs)
+      with self.test_session(graph=g) as sess:
+        sess.run(init_op)
+        # Note: There is no checkpoint saved currently so a NotFoundError is
+        # raised.
+        with self.assertRaises(errors.NotFoundError):
+          sess.run(restore_op)
+        for _ in range(num_epochs):
+          for f in range(self._num_files):
+            for r in range(self._num_records):
+              self.assertEqual(self._record(f, r), sess.run(get_next_op))
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next_op)
+        sess.run(save_op)
+
+    with ops.Graph().as_default() as g:
+      init_op, get_next_op, save_op, restore_op = self._build_iterator_graph(
+          num_epochs=num_epochs)
+      with self.test_session(graph=g) as sess:
+        sess.run(init_op)
+        sess.run(restore_op)
+        with self.assertRaises(errors.OutOfRangeError):
+          sess.run(get_next_op)
+
 
 class TFRecordDatasetTest(test.TestCase):
 
@@ -558,8 +676,8 @@ class ReadBatchFeaturesTest(test.TestCase):
   def testRead(self):
     for batch_size in [1, 2]:
       for num_epochs in [1, 10]:
-        with ops.Graph().as_default():
-          with self.test_session(graph=ops.get_default_graph()) as sess:
+        with ops.Graph().as_default() as g:
+          with self.test_session(graph=g) as sess:
             # Basic test: read from file 0.
             self.outputs = self._read_batch_features(
                 filenames=self.test_filenames[0],
@@ -569,8 +687,8 @@ class ReadBatchFeaturesTest(test.TestCase):
             with self.assertRaises(errors.OutOfRangeError):
               self._next_actual_batch(sess)
 
-        with ops.Graph().as_default():
-          with self.test_session(graph=ops.get_default_graph()) as sess:
+        with ops.Graph().as_default() as g:
+          with self.test_session(graph=g) as sess:
             # Basic test: read from file 1.
             self.outputs = self._read_batch_features(
                 filenames=self.test_filenames[1],
@@ -580,8 +698,8 @@ class ReadBatchFeaturesTest(test.TestCase):
             with self.assertRaises(errors.OutOfRangeError):
               self._next_actual_batch(sess)
 
-        with ops.Graph().as_default():
-          with self.test_session(graph=ops.get_default_graph()) as sess:
+        with ops.Graph().as_default() as g:
+          with self.test_session(graph=g) as sess:
             # Basic test: read from both files.
             self.outputs = self._read_batch_features(
                 filenames=self.test_filenames,
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 79a7b8a8b9..0893a01204 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -5568,6 +5568,7 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
+        "//tensorflow/core/util/tensor_bundle",
     ],
 )
 
diff --git a/tensorflow/core/kernels/dataset.cc b/tensorflow/core/kernels/dataset.cc
index f99684b1ca..2bfbdc1cd9 100644
--- a/tensorflow/core/kernels/dataset.cc
+++ b/tensorflow/core/kernels/dataset.cc
@@ -52,4 +52,6 @@ void BinaryDatasetOpKernel::MakeDataset(OpKernelContext* ctx,
   MakeDataset(ctx, input, another_input, output);
 }
 
+const char IteratorBase::kIteratorExhausted[] = "ITERATOR_EXHAUSTED";
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h
index 9bfc5c1e96..aa97d34041 100644
--- a/tensorflow/core/kernels/dataset.h
+++ b/tensorflow/core/kernels/dataset.h
@@ -19,7 +19,11 @@ limitations under the License.
 
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/resource_mgr.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/tracing.h"
+#include "tensorflow/core/util/tensor_bundle/naming.h"
+#include "tensorflow/core/util/tensor_bundle/tensor_bundle.h"
 
 // Polymorphic datasets should support all primitive TensorFlow
 // types. Use this macro to expand `m(T)` once for each primitive type
@@ -86,6 +90,10 @@ class IteratorContext {
 // range of outputs is typically represented by an `DatasetBase`,
 // defined below.
 class IteratorBase {
+ protected:
+  class IteratorBundleReader;
+  class IteratorBundleWriter;
+
  public:
   virtual ~IteratorBase() {}
 
@@ -115,6 +123,118 @@ class IteratorBase {
   // (and possibly partially defined) shapes of each tuple component
   // in the outputs of this iterator.
   virtual const std::vector& output_shapes() const = 0;
+
+  // Saves the state of this iterator.
+  virtual Status SaveState(OpKernelContext* ctx, StringPiece path) {
+    BundleWriter bundle_writer(ctx->env(), path);
+    IteratorBundleWriter writer(&bundle_writer);
+    if (is_exhausted_) {
+      LOG(INFO) << "Iterator exhausted. Nothing to save.";
+      TF_RETURN_IF_ERROR(
+          writer.WriteScalar(kIteratorExhausted, kIteratorExhausted));
+    } else {
+      TF_RETURN_IF_ERROR(SaveStateInternal(ctx, &writer));
+    }
+    TF_RETURN_IF_ERROR(bundle_writer.Finish());
+    return Status::OK();
+  }
+
+  // Restores the state of this iterator.
+  virtual Status RestoreState(OpKernelContext* ctx, StringPiece& path) {
+    if (!(ctx->env()->FileExists(MetaFilename(path)).ok())) {
+      return errors::NotFound(
+          "Failed to restore Iterator state. No file found at ",
+          MetaFilename(path));
+    }
+    BundleReader bundle_reader(ctx->env(), path);
+    if (bundle_reader.Contains(kIteratorExhausted)) {
+      LOG(INFO) << "Iterator exhausted. Nothing to restore.";
+      is_exhausted_ = true;
+      return Status::OK();
+    } else {
+      IteratorBundleReader reader(&bundle_reader);
+      return RestoreStateInternal(ctx, &reader);
+    }
+  }
+
+ protected:
+  class IteratorBundleReader {
+   public:
+    IteratorBundleReader(BundleReader* bundle_reader)
+        : bundle_reader_(bundle_reader) {}
+
+    // Reads a scalar value.
+    template 
+    Status ReadScalar(T* val, const string& key) {
+      Tensor val_t = Tensor(DataTypeToEnum::v(), TensorShape({}));
+      TF_RETURN_IF_ERROR(Lookup(StringPiece(key), &val_t));
+      *val = val_t.scalar()();
+      return Status::OK();
+    }
+
+    // Restores the state of a parent iterator recursively.
+    Status RestoreParentState(OpKernelContext* ctx,
+                              const std::unique_ptr& parent) {
+      return parent->RestoreStateInternal(ctx, this);
+    }
+
+   private:
+    Status Lookup(StringPiece key, Tensor* val) {
+      return bundle_reader_->Lookup(key, val);
+    }
+
+    BundleReader* bundle_reader_;
+  };
+
+  class IteratorBundleWriter {
+   public:
+    IteratorBundleWriter(BundleWriter* bundle_writer)
+        : bundle_writer_(bundle_writer) {}
+
+    // Writes a scalar value.
+    template 
+    Status WriteScalar(const T val, const string& key) {
+      Tensor val_t = Tensor(DataTypeToEnum::v(), TensorShape({}));
+      val_t.scalar()() = val;
+      TF_RETURN_IF_ERROR(Add(StringPiece(key), val_t));
+      return Status::OK();
+    }
+
+    // Saves the state of a parent iterator recursively.
+    Status SaveParentState(OpKernelContext* ctx,
+                           const std::unique_ptr& parent) {
+      return parent->SaveStateInternal(ctx, this);
+    }
+
+   private:
+    Status Add(StringPiece key, const Tensor& val) {
+      return bundle_writer_->Add(key, val);
+    }
+
+    BundleWriter* bundle_writer_;
+  };
+
+  // Saves the state of this iterator.
+  // Note: Contents written to `writer` may not get flushed to disk
+  // until the call to `SaveState` in the leaf iterator is finished.
+  // Must be overridden by sub-classes.
+  virtual Status SaveStateInternal(OpKernelContext* ctx,
+                                   IteratorBundleWriter* writer) {
+    return errors::Unimplemented("SaveState not implemented.");
+  }
+
+  // Restores the state of this iterator.
+  //
+  // Must be overridden by sub-classes.
+  virtual Status RestoreStateInternal(OpKernelContext* ctx,
+                                      IteratorBundleReader* reader) {
+    return errors::Unimplemented("RestoreState not implemented");
+  }
+
+  bool is_exhausted_ = false;  // Whether the iterator has been exhausted.
+
+ private:
+  static const char kIteratorExhausted[];
 };
 
 // Represents a (potentially infinite) range of outputs, where each
@@ -182,6 +302,10 @@ class DatasetIterator : public IteratorBase {
   Status GetNext(IteratorContext* ctx, std::vector* out_tensors,
                  bool* end_of_sequence) final {
     port::Tracing::TraceMe activity(params_.prefix);
+    if (is_exhausted_) {
+      *end_of_sequence = true;
+      return Status::OK();
+    }
     return GetNextInternal(ctx, out_tensors, end_of_sequence);
   }
 
@@ -190,6 +314,11 @@ class DatasetIterator : public IteratorBase {
                                  std::vector* out_tensors,
                                  bool* end_of_sequence) = 0;
 
+ protected:
+  string full_name(const string& name) {
+    return strings::StrCat(prefix(), ":", name);
+  }
+
  private:
   Params params_;
 };
diff --git a/tensorflow/core/kernels/iterator_ops.cc b/tensorflow/core/kernels/iterator_ops.cc
index c0e4f91991..7f0e11872a 100644
--- a/tensorflow/core/kernels/iterator_ops.cc
+++ b/tensorflow/core/kernels/iterator_ops.cc
@@ -89,6 +89,31 @@ class IteratorResource : public ResourceBase {
     }
   }
 
+  Status SaveState(OpKernelContext* ctx, StringPiece path) {
+    std::shared_ptr captured_iterator(iterator_);
+    if (captured_iterator) {
+      return captured_iterator->SaveState(ctx, path);
+    } else {
+      return errors::FailedPrecondition(
+          "SaveState() failed because the iterator has not been initialized. "
+          "Ensure that you have run the initializer operation for this "
+          "iterator before getting the next element.");
+    }
+  }
+
+  Status RestoreState(OpKernelContext* ctx, StringPiece path) {
+    std::shared_ptr captured_iterator(iterator_);
+    if (captured_iterator) {
+      return captured_iterator->RestoreState(ctx, path);
+    } else {
+      return errors::FailedPrecondition(
+          "RestoreState() failed because the iterator has not been "
+          "initialized. "
+          "Ensure that you have run the initializer operation for this "
+          "iterator before getting the next element.");
+    }
+  }
+
   // Transfers ownership of iterator to this. This method is thread-safe.
   Status set_iterator(std::unique_ptr iterator) {
     if (iterator) {
@@ -161,6 +186,32 @@ class MakeIteratorOp : public OpKernel {
   }
 };
 
+class SaveIteratorOp : public OpKernel {
+ public:
+  explicit SaveIteratorOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    IteratorResource* iterator_resource;
+    OP_REQUIRES_OK(
+        ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator_resource));
+    const string& path = ctx->input(1).scalar()();
+    OP_REQUIRES_OK(ctx, iterator_resource->SaveState(ctx, path));
+  }
+};
+
+class RestoreIteratorOp : public OpKernel {
+ public:
+  explicit RestoreIteratorOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    IteratorResource* iterator_resource;
+    OP_REQUIRES_OK(
+        ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator_resource));
+    const string& path = ctx->input(1).scalar()();
+    OP_REQUIRES_OK(ctx, iterator_resource->RestoreState(ctx, path));
+  }
+};
+
 class OneShotIteratorOp : public AsyncOpKernel {
  public:
   explicit OneShotIteratorOp(OpKernelConstruction* ctx)
@@ -504,6 +555,10 @@ class IteratorFromStringHandleOp : public OpKernel {
 REGISTER_KERNEL_BUILDER(Name("Iterator").Device(DEVICE_CPU), IteratorHandleOp);
 REGISTER_KERNEL_BUILDER(Name("MakeIterator").Device(DEVICE_CPU),
                         MakeIteratorOp);
+REGISTER_KERNEL_BUILDER(Name("SaveIterator").Device(DEVICE_CPU),
+                        SaveIteratorOp);
+REGISTER_KERNEL_BUILDER(Name("RestoreIterator").Device(DEVICE_CPU),
+                        RestoreIteratorOp);
 REGISTER_KERNEL_BUILDER(Name("OneShotIterator").Device(DEVICE_CPU),
                         OneShotIteratorOp);
 REGISTER_KERNEL_BUILDER(Name("IteratorGetNext").Device(DEVICE_CPU),
diff --git a/tensorflow/core/kernels/range_dataset_op.cc b/tensorflow/core/kernels/range_dataset_op.cc
index a32a02f57d..9976c55838 100644
--- a/tensorflow/core/kernels/range_dataset_op.cc
+++ b/tensorflow/core/kernels/range_dataset_op.cc
@@ -86,6 +86,7 @@ class RangeDatasetOp : public DatasetOpKernel {
         if ((dataset()->step_ > 0 && next_ >= dataset()->stop_) ||
             (dataset()->step_ < 0 && next_ <= dataset()->stop_)) {
           *end_of_sequence = true;
+          is_exhausted_ = true;
           return Status::OK();
         }
         Tensor value_tensor(cpu_allocator(), DT_INT64, {});
@@ -97,9 +98,26 @@ class RangeDatasetOp : public DatasetOpKernel {
         return Status::OK();
       }
 
+     protected:
+      Status SaveStateInternal(OpKernelContext* ctx,
+                               IteratorBundleWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(next_, full_name("next")));
+        return Status::OK();
+      }
+
+      Status RestoreStateInternal(OpKernelContext* ctx,
+                                  IteratorBundleReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(&next_, full_name("next")));
+        return Status::OK();
+      }
+
      private:
       mutex mu_;
-      int64 next_;
+      int64 next_ GUARDED_BY(mu_);
     };
 
     const int64 start_;
diff --git a/tensorflow/core/kernels/reader_dataset_ops.cc b/tensorflow/core/kernels/reader_dataset_ops.cc
index 407f69cde7..73fc09abc8 100644
--- a/tensorflow/core/kernels/reader_dataset_ops.cc
+++ b/tensorflow/core/kernels/reader_dataset_ops.cc
@@ -315,6 +315,7 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel {
           // Iteration ends when there are no more files to process.
           if (current_file_index_ == dataset()->filenames_.size()) {
             *end_of_sequence = true;
+            is_exhausted_ = true;
             return Status::OK();
           }
 
@@ -332,6 +333,51 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel {
         } while (true);
       }
 
+     protected:
+      Status SaveStateInternal(OpKernelContext* ctx,
+                               IteratorBundleWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(writer->WriteScalar(
+            current_file_index_, full_name("current_file_index")));
+
+        // `input_buffer_` is empty if
+        // 1. GetNext has not been called even once.
+        // 2. All files have been read and iterator has been exhausted.
+        int64 current_pos = input_buffer_ ? input_buffer_->Tell() : -1;
+        TF_RETURN_IF_ERROR(
+            writer->WriteScalar(current_pos, full_name("current_pos")));
+        return Status::OK();
+      }
+
+      Status RestoreStateInternal(OpKernelContext* ctx,
+                                  IteratorBundleReader* reader) override {
+        mutex_lock l(mu_);
+        int64 current_file_index;
+        TF_RETURN_IF_ERROR(reader->ReadScalar(
+            ¤t_file_index, full_name("current_file_index")));
+        current_file_index_ = size_t(current_file_index);
+        int64 current_pos;
+        TF_RETURN_IF_ERROR(
+            reader->ReadScalar(¤t_pos, full_name("current_pos")));
+
+        // Seek to current_pos.
+        input_buffer_.reset();
+        file_.reset();
+        if (current_pos >= 0) {  // There was an active input_buffer_.
+          uint64 file_size;
+          TF_RETURN_IF_ERROR(ctx->env()->GetFileSize(
+              dataset()->filenames_[current_file_index_], &file_size));
+          file_pos_limit_ = file_size - dataset()->footer_bytes_;
+          TF_RETURN_IF_ERROR(ctx->env()->NewRandomAccessFile(
+              dataset()->filenames_[current_file_index_], &file_));
+          input_buffer_.reset(
+              new io::InputBuffer(file_.get(), dataset()->buffer_size_));
+          TF_RETURN_IF_ERROR(input_buffer_->Seek(current_pos));
+        }
+
+        return Status::OK();
+      }
+
      private:
       mutex mu_;
       size_t current_file_index_ GUARDED_BY(mu_) = 0;
diff --git a/tensorflow/core/kernels/repeat_dataset_op.cc b/tensorflow/core/kernels/repeat_dataset_op.cc
index ef17fc8d7d..6ed69ecf2e 100644
--- a/tensorflow/core/kernels/repeat_dataset_op.cc
+++ b/tensorflow/core/kernels/repeat_dataset_op.cc
@@ -107,10 +107,28 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel {
           input_impl_ = dataset()->input_->MakeIterator(prefix());
         }
         *end_of_sequence = true;
+        is_exhausted_ = true;
         input_impl_.reset();
         return Status::OK();
       }
 
+     protected:
+      Status SaveStateInternal(OpKernelContext* ctx,
+                               IteratorBundleWriter* writer) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(writer->WriteScalar(i_, full_name("i")));
+        TF_RETURN_IF_ERROR(writer->SaveParentState(ctx, input_impl_));
+        return Status::OK();
+      }
+
+      Status RestoreStateInternal(OpKernelContext* ctx,
+                                  IteratorBundleReader* reader) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(reader->ReadScalar(&i_, full_name("i")));
+        TF_RETURN_IF_ERROR(reader->RestoreParentState(ctx, input_impl_));
+        return Status::OK();
+      }
+
      private:
       mutex mu_;
       int64 i_ GUARDED_BY(mu_);
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 34b9a2119a..f6bd5768d7 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -499,6 +499,24 @@ This operation may be executed multiple times. Each execution will reset the
 iterator in `iterator` to the first element of `dataset`.
 )doc");
 
+REGISTER_OP("SaveIterator")
+    .Input("iterator: resource")
+    .Input("path: string")
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc(R"doc(
+Saves the state of the `iterator` at `path`.
+
+This state can be restored using "RestoreIterator".
+)doc");
+
+REGISTER_OP("RestoreIterator")
+    .Input("iterator: resource")
+    .Input("path: string")
+    .SetShapeFn(shape_inference::NoOutputs)
+    .Doc(R"doc(
+Restores the state of the `iterator` from the checkpoint saved at `path` using "SaveIterator".
+)doc");
+
 REGISTER_OP("OneShotIterator")
     .Output("handle: resource")
     .Attr("dataset_factory: func")
-- 
GitLab


From 9c0302760d482abda065750ddee603e23354cede Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 01:09:11 -0700
Subject: [PATCH 092/294] Update ops-related pbtxt files.

PiperOrigin-RevId: 166962648
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 24 +++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 27 +++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 8a481fadf8..ad290d123e 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -22394,6 +22394,18 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "RestoreIterator"
+  input_arg {
+    name: "iterator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "path"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
 op {
   name: "RestoreSlice"
   input_arg {
@@ -23117,6 +23129,18 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "SaveIterator"
+  input_arg {
+    name: "iterator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "path"
+    type: DT_STRING
+  }
+  is_stateful: true
+}
 op {
   name: "SaveSlices"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index f5723807f6..13356e1d8a 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -22093,6 +22093,19 @@ op {
   description: "Reads a tensor stored in one or several files. If there are several files (for\ninstance because a tensor was saved as slices), `file_pattern` may contain\nwildcard symbols (`*` and `?`) in the filename portion only, not in the\ndirectory portion.\n\nIf a `file_pattern` matches several files, `preferred_shard` can be used to hint\nin which file the requested tensor is likely to be found. This op will first\nopen the file at index `preferred_shard` in the list of matching files and try\nto restore tensors from that file.  Only if some tensors or tensor slices are\nnot found in that first file, then the Op opens all the files. Setting\n`preferred_shard` to match the value passed as the `shard` input\nof a matching `Save` Op may speed up Restore.  This attribute only affects\nperformance, not correctness.  The default value -1 means files are processed in\norder.\n\nSee also `RestoreSlice`."
   is_stateful: true
 }
+op {
+  name: "RestoreIterator"
+  input_arg {
+    name: "iterator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "path"
+    type: DT_STRING
+  }
+  summary: "Restores the state of the `iterator` from the checkpoint saved at `path` using \"SaveIterator\"."
+  is_stateful: true
+}
 op {
   name: "RestoreSlice"
   input_arg {
@@ -22653,6 +22666,20 @@ op {
   description: "The size of `tensor_names` must match the number of tensors in `data`. `data[i]`\nis written to `filename` with name `tensor_names[i]`.\n\nSee also `SaveSlices`."
   is_stateful: true
 }
+op {
+  name: "SaveIterator"
+  input_arg {
+    name: "iterator"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "path"
+    type: DT_STRING
+  }
+  summary: "Saves the state of the `iterator` at `path`."
+  description: "This state can be restored using \"RestoreIterator\"."
+  is_stateful: true
+}
 op {
   name: "SaveSlices"
   input_arg {
-- 
GitLab


From d78a24d407600845879f8825ea3505306f050eca Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 01:14:56 -0700
Subject: [PATCH 093/294] Go: Update generated wrapper functions for TensorFlow
 ops.

PiperOrigin-RevId: 166963099
---
 tensorflow/go/op/wrappers.go | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 1b86ed3929..0781347fd6 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -5614,6 +5614,40 @@ func IteratorGetNext(scope *Scope, iterator tf.Output, output_types []tf.DataTyp
 	return components
 }
 
+// Restores the state of the `iterator` from the checkpoint saved at `path` using "SaveIterator".
+//
+// Returns the created operation.
+func RestoreIterator(scope *Scope, iterator tf.Output, path tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RestoreIterator",
+		Input: []tf.Input{
+			iterator, path,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Saves the state of the `iterator` at `path`.
+//
+// This state can be restored using "RestoreIterator".
+//
+// Returns the created operation.
+func SaveIterator(scope *Scope, iterator tf.Output, path tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SaveIterator",
+		Input: []tf.Input{
+			iterator, path,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Makes a new iterator from the given `dataset` and stores it in `iterator`.
 //
 // This operation may be executed multiple times. Each execution will reset the
-- 
GitLab


From 96b852627307d9375b2391ef6273abc78a2db5b2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 02:43:21 -0700
Subject: [PATCH 094/294] Don't remove trivial dimensions if they require no
 broadcast.

Currently, the conversion from implicit broadcasts to explicit broadcasts also
removes dimensions which are the same as the output shape. This means that
sometimes potentially costly (on some backends) reshapes are required.
This CL changes the conversion that it will only remove trivial dimensions if
they actually require a broadcast.

PiperOrigin-RevId: 166970167
---
 .../compiler/xla/service/user_computation.cc  |  2 +-
 .../xla/service/user_computation_test.cc      | 59 +++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
index cfa5c98f59..297bfd93d1 100644
--- a/tensorflow/compiler/xla/service/user_computation.cc
+++ b/tensorflow/compiler/xla/service/user_computation.cc
@@ -2478,7 +2478,7 @@ HloInstruction* ComputationLowerer::ImplicitBroadcastToExplicitBroadcast(
   std::vector broadcast_dimensions;
   std::vector reshaped_dimensions;
   for (int i = 0; i < ShapeUtil::Rank(operand->shape()); i++) {
-    if (operand->shape().dimensions(i) > 1) {
+    if (operand->shape().dimensions(i) == output_shape.dimensions(i)) {
       broadcast_dimensions.push_back(i);
       reshaped_dimensions.push_back(operand->shape().dimensions(i));
     }
diff --git a/tensorflow/compiler/xla/service/user_computation_test.cc b/tensorflow/compiler/xla/service/user_computation_test.cc
index 7097495929..6b0d6b9e11 100644
--- a/tensorflow/compiler/xla/service/user_computation_test.cc
+++ b/tensorflow/compiler/xla/service/user_computation_test.cc
@@ -197,6 +197,65 @@ TEST_F(UserComputationTest, EliminateScalarBroadcast) {
               operands[1]->opcode() == HloOpcode::kBroadcast);
 }
 
+TEST_F(UserComputationTest, CheckImplicitBroadcastToExplicitBroadcast) {
+  auto debug_options = DebugOptions();
+  debug_options.set_xla_eliminate_hlo_implicit_broadcast(true);
+
+  // Build a binary computation with degenerate broadcast.
+  //
+  //  %a = Param({1, 2, 3});
+  //  %b = Param({1, 2, 1});
+  //  %add = Add(%a, %b, {});
+  ComputationHandle handle;
+  handle.set_handle(123);
+  UserComputation computation("TheComputation", handle);
+
+  ParameterRequest a_request;
+  *a_request.mutable_shape() = ShapeUtil::MakeShape(F32, {1, 2, 3});
+  a_request.set_name("a");
+  a_request.set_parameter(0);
+  TF_ASSERT_OK_AND_ASSIGN(ComputationDataHandle a_handle,
+                          computation.AddParameterInstruction(a_request));
+
+  ParameterRequest b_request;
+  *b_request.mutable_shape() = ShapeUtil::MakeShape(F32, {1, 2, 1});
+  b_request.set_name("b");
+  b_request.set_parameter(1);
+  TF_ASSERT_OK_AND_ASSIGN(ComputationDataHandle b_handle,
+                          computation.AddParameterInstruction(b_request));
+
+  BinaryOpRequest add;
+  add.set_binop(BINOP_ADD);
+  *add.mutable_lhs() = a_handle;
+  *add.mutable_rhs() = b_handle;
+  TF_ASSERT_OK(computation.AddBinaryInstruction(add).status());
+
+  auto hlo_resolver = [](const VersionedComputationHandle& handle) {
+    return nullptr;
+  };
+  VersionedComputationHandle latest_version = computation.GetVersionedHandle();
+
+  // Build the HLO computation.
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr hlo_computation,
+      computation.BuildHloComputation(latest_version.version, hlo_resolver,
+                                      debug_options));
+
+  //    b         a
+  //    |         |
+  // reshape      |
+  //    |         |
+  // broadcast    |
+  //     \       /
+  //        add
+  EXPECT_EQ(5, hlo_computation->instruction_count());
+  EXPECT_THAT(hlo_computation->root_instruction(), op::Add());
+  const auto& operands = hlo_computation->root_instruction()->operands();
+  ASSERT_EQ(2, operands.size());
+  EXPECT_TRUE(operands[0]->opcode() == HloOpcode::kParameter &&
+              operands[1]->opcode() == HloOpcode::kBroadcast);
+}
+
 TEST_F(UserComputationTest, EliminateDegenerateBroadcastAfterIndimBroadcast) {
   auto debug_options = DebugOptions();
   debug_options.set_xla_eliminate_hlo_implicit_broadcast(true);
-- 
GitLab


From f9c5e921dd7058ea517a3d984b2e161d8dd19cee Mon Sep 17 00:00:00 2001
From: Peter Hawkins 
Date: Wed, 30 Aug 2017 08:57:22 -0700
Subject: [PATCH 095/294] [TF:XLA] Implement SqrtGrad.

PiperOrigin-RevId: 167000454
---
 tensorflow/compiler/tests/binary_ops_test.py     |  6 ++++++
 tensorflow/compiler/tests/randomized_tests.cc    | 10 ++++++++++
 tensorflow/compiler/tf2xla/kernels/binary_ops.cc |  4 ++++
 3 files changed, 20 insertions(+)

diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index cf7ffc5af1..e349aefd4c 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -94,6 +94,12 @@ class BinaryOpsTest(XLATestCase):
           np.array([5, 6, 7, 8], dtype=dtype),
           expected=np.array([-160, -81, -28, -4], dtype=dtype))
 
+      self._testBinary(
+          gen_math_ops._sqrt_grad,
+          np.array([4, 3, 2, 1], dtype=dtype),
+          np.array([5, 6, 7, 8], dtype=dtype),
+          expected=np.array([0.625, 1, 1.75, 4], dtype=dtype))
+
       self._testBinary(
           gen_nn_ops._softplus_grad,
           np.array([4, 3, 2, 1], dtype=dtype),
diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc
index 6a0bdf1ed1..e380fdd7f4 100644
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@@ -2496,6 +2496,16 @@ TEST_F(OpTest, Sqrt) {
   });
 }
 
+TEST_F(OpTest, SqrtGrad) {
+  Repeatedly([this]() {
+    auto dims = RandomDims();
+    return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("SqrtGrad")
+                                             .RandomInput(DT_FLOAT, dims)
+                                             .RandomInput(DT_FLOAT, dims)
+                                             .Attr("T", DT_FLOAT));
+  });
+}
+
 TEST_F(OpTest, SquaredDifference) {
   Repeatedly([this]() {
     auto dims = BroadcastableDims();
diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
index ded20a9a3c..f9bb1e2fb1 100644
--- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc
@@ -107,6 +107,10 @@ XLA_MAKE_BINARY(
     b->Mul(b->Pow(lhs, XlaHelpers::IntegerLiteral(b, input_type(0), 3)),
            b->Div(rhs, XlaHelpers::IntegerLiteral(b, input_type(0), -2)),
            extend_dimensions));
+XLA_MAKE_BINARY(SqrtGrad,
+                b->Div(b->Mul(rhs,
+                              XlaHelpers::FloatLiteral(b, input_type(0), 0.5)),
+                       lhs, extend_dimensions));
 
 static xla::ComputationDataHandle Square(xla::ComputationBuilder* builder,
                                          const xla::ComputationDataHandle& x) {
-- 
GitLab


From 48e3b625410d4854296c6ddc8a2b102d25e483ba Mon Sep 17 00:00:00 2001
From: Eugene Brevdo 
Date: Wed, 30 Aug 2017 09:27:00 -0700
Subject: [PATCH 096/294] Delete tf.contrib.training.python_input.

It has been replaced by tf.contrib.data.Dataset.from_generator.

PiperOrigin-RevId: 167004190
---
 RELEASE.md                                    |   3 +
 .../contrib/data/python/ops/dataset_ops.py    |  17 ++
 tensorflow/contrib/training/BUILD             |  18 --
 tensorflow/contrib/training/__init__.py       |   2 -
 .../training/python/training/python_input.py  | 178 ----------------
 .../python/training/python_input_test.py      | 191 ------------------
 6 files changed, 20 insertions(+), 389 deletions(-)
 delete mode 100644 tensorflow/contrib/training/python/training/python_input.py
 delete mode 100644 tensorflow/contrib/training/python/training/python_input_test.py

diff --git a/RELEASE.md b/RELEASE.md
index ec66e555ad..d120f068ca 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -9,6 +9,9 @@
   for LSTMs and stacked LSTMs.  This bug fix follows recommendations from
   published literature, but is a behavioral change.  State dropout behavior
   may be customized via the new `dropout_state_filter_visitor` argument.
+* Removed `tf.contrib.training.python_input`.  The same behavior, in a more
+  flexible and reproducible package, is available via the new
+  `tf.contrib.data.Dataset.from_generator` method!
 
 # Release 1.3.0
 
diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py
index e692822aaf..5b77129a48 100644
--- a/tensorflow/contrib/data/python/ops/dataset_ops.py
+++ b/tensorflow/contrib/data/python/ops/dataset_ops.py
@@ -595,6 +595,23 @@ class Dataset(object):
     The elements generated by `generator` must be compatible with the given
     `output_types` and (optional) `output_shapes` arguments.
 
+    For example:
+
+    ```python
+    import itertools
+
+    def gen():
+      for i in itertools.count(1):
+        yield (i, [1] * i)
+
+    ds = Dataset.from_generator(
+        gen, (tf.int64, tf.int64), (tf.TensorShape([]), tf.TensorShape([None])))
+    value = ds.make_one_shot_iterator().get_next()
+
+    sess.run(value)  # (1, array([1]))
+    sess.run(value)  # (2, array([1, 1]))
+    ```
+
     Args:
       generator: A callable object that takes no arguments and returns an
         object that supports the `iter()` protocol.
diff --git a/tensorflow/contrib/training/BUILD b/tensorflow/contrib/training/BUILD
index e8c6c349c8..8e3d869a51 100644
--- a/tensorflow/contrib/training/BUILD
+++ b/tensorflow/contrib/training/BUILD
@@ -23,7 +23,6 @@ py_library(
         "python/training/evaluation.py",
         "python/training/feeding_queue_runner.py",
         "python/training/hparam.py",
-        "python/training/python_input.py",
         "python/training/resample.py",
         "python/training/sampling_ops.py",
         "python/training/sequence_queueing_state_saver.py",
@@ -226,23 +225,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "python_input_test",
-    size = "medium",
-    srcs = ["python/training/python_input_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["manual"],
-    deps = [
-        ":training_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:parsing_ops",
-        "//tensorflow/python:training",
-        "//third_party/py/numpy",
-    ],
-)
-
 py_test(
     name = "evaluation_test",
     size = "small",
diff --git a/tensorflow/contrib/training/__init__.py b/tensorflow/contrib/training/__init__.py
index 87a70e6d16..da2de3e421 100644
--- a/tensorflow/contrib/training/__init__.py
+++ b/tensorflow/contrib/training/__init__.py
@@ -36,7 +36,6 @@ See @{$python/contrib.training} guide.
 @@HParams
 @@HParamDef
 @@parse_values
-@@python_input
 """
 
 from __future__ import absolute_import
@@ -55,7 +54,6 @@ from tensorflow.contrib.training.python.training.evaluation import SummaryAtEndH
 from tensorflow.contrib.training.python.training.evaluation import wait_for_new_checkpoint
 from tensorflow.contrib.training.python.training.feeding_queue_runner import FeedingQueueRunner
 from tensorflow.contrib.training.python.training.hparam import *
-from tensorflow.contrib.training.python.training.python_input import python_input
 from tensorflow.contrib.training.python.training.resample import *
 from tensorflow.contrib.training.python.training.sampling_ops import *
 from tensorflow.contrib.training.python.training.sequence_queueing_state_saver import *
diff --git a/tensorflow/contrib/training/python/training/python_input.py b/tensorflow/contrib/training/python/training/python_input.py
deleted file mode 100644
index 7f5420a98a..0000000000
--- a/tensorflow/contrib/training/python/training/python_input.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Operations for asynchronously reading data from python into queues.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import threading
-
-import numpy as np
-
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import script_ops
-
-
-def _process_yielded_dict(feature_values, keys, features, dtypes, shapes):
-  """Read feature_values from the generator and emit a proper output dict."""
-  if not isinstance(feature_values, dict):
-    raise TypeError("generator must return dict, saw: %s" % feature_values)
-
-  processed_values = {}
-  for pk in keys:
-    if feature_values.get(pk, None) is not None:
-      processed_values[pk] = np.asarray(
-          feature_values[pk], dtype=dtypes[pk].as_numpy_dtype)
-      check_shape = tensor_shape.TensorShape(processed_values[pk].shape)
-      if not shapes[pk].is_compatible_with(check_shape):
-        raise ValueError(
-            "Feature '%s' has shape %s that is incompatible with declared "
-            "shape: %s" % (pk, shapes[pk], check_shape))
-      continue
-    if isinstance(features[pk], parsing_ops.FixedLenFeature):
-      if features[pk].default_value is not None:
-        processed_values[pk] = np.asarray(
-            features[pk].default_value, dtype=dtypes[pk].as_numpy_dtype)
-    elif isinstance(features[pk], parsing_ops.FixedLenSequenceFeature):
-      processed_values[pk] = np.empty(
-          [0] + features[pk].shape.aslist(), dtype=dtypes[pk].as_numpy_dtype)
-    else:
-      raise ValueError(
-          "Expected generator to return key '%s' with non-empty value" % pk)
-
-  return processed_values
-
-
-def python_input(generator, features, name=None):
-  """Easily feed data from a python generator into TensorFlow queues.
-
-  Example usage:
-
-  ```python
-  def generator():
-    for i in range(3):
-      yield {"value": i}
-
-  features = {
-    "value": tf.FixedLenFeature(shape=[], dtype=dtypes.int32)
-  }
-
-  tensor_dict = tf.contrib.training.python_input(generator, features)
-  batched_dict = tf.train.batch(
-    tensor_dict, batch_size=2, allow_smaller_final_batch=True)
-
-  s = tf.Session()
-  tf.train.start_queue_runners()
-
-  batch1 = s.run(batched_dict)  # returns {"value": np.array([0, 1])}
-  batch2 = s.run(batched_dict)  # returns {"value": np.array([2])}
-  s.run(batched_dict)  # error: Queue is closed (generator finished at i==3)
-  ```
-
-  Args:
-    generator: A python generator that takes no arguments, and yields dicts
-      containing a single minibatch entry one at a time.
-    features: A python `dict` mapping keys expected from the generator to
-      instances of `tf.FixedLenFeature`, or `tf.FixedLenSequenceFeature`.
-    name: (Optional) A name for the operations.
-
-  Returns:
-    A dict mapping keys of the `features` dict to `Tensor` objects.
-    These `Tensor` objects are outputs of a queue that is fed by `generator`.
-
-  Raises:
-    TypeError: If generator is not callable or features is not a dict.
-    TypeError: If any of features' values are not a Feature object.
-    NotImplementedError: If any of features' values are instances of
-      `SparseFeature` or `VarLenFeature`  (these are not currently supported).
-    ValueError: If any FixedLenSequenceFeatures contain a default value
-      (this field is not supported).
-    ValueError: if any FixedLenSequenceFeatures have allow_missing=False
-      (this field is not supported).
-  """
-  if not callable(generator):
-    raise TypeError("generator must be callable, saw: %s" % generator)
-  if not isinstance(features, dict):
-    raise TypeError("features must be a dict, saw: %s"
-                    % type(features).__name__)
-
-  with ops.name_scope(name, "python_input"):
-    shapes = {}
-    dtypes = {}
-    for k, v in features.items():
-      if isinstance(v, parsing_ops.FixedLenFeature):
-        if v.default_value is not None:
-          value = ops.convert_to_tensor(v.default_value, dtype=v.dtype, name=k)
-          shapes[k] = value.shape
-          dtypes[k] = value.dtype
-        else:
-          tensor_shape.TensorShape(v.shape).assert_is_fully_defined()
-          shapes[k] = tensor_shape.TensorShape(v.shape)
-          dtypes[k] = v.dtype
-      elif isinstance(v, parsing_ops.VarLenFeature):
-        raise NotImplementedError("VarLenFeature not supported")
-      elif isinstance(v, parsing_ops.SparseFeature):
-        raise NotImplementedError("SparseFeature not supported")
-      elif isinstance(v, parsing_ops.FixedLenSequenceFeature):
-        if v.default_value is not None:
-          raise ValueError("FixedLenSequenceFeature with default value not "
-                           "supported")
-        if not v.allow_missing:
-          raise ValueError("FixedLenSequenceFeature with allow_missing=False "
-                           "not supported")
-        tensor_shape.TensorShape(v.shape).assert_is_fully_defined()
-        shapes[k] = tensor_shape.TensorShape([None]).concatenate(v.shape)
-        dtypes[k] = v.dtype
-      else:
-        raise TypeError(
-            "Expected value for features key '%s' to be one of "
-            "FixedLenFeature, VarLenFeature, SparseFeature, or "
-            "FixedLenSequenceFeature.  Got: %s" % (k, v))
-
-    keys = list(shapes.keys())
-    dtypes_list = [dtypes[pk] for pk in keys]
-
-    counter = [0]
-    lock = threading.Lock()
-    iterator = iter(generator())
-
-    def generator_iter():
-      """Iterate through generator output and return np.arrays to py_func."""
-      with lock:
-        try:
-          feature_values = next(iterator)
-          counter[0] += 1
-        except StopIteration as e:
-          raise StopIteration("Iteration finished.  Processed %d entries (%s)"
-                              % (counter[0], e))
-
-      processed_dict = _process_yielded_dict(
-          feature_values, keys, features, dtypes, shapes)
-      return [processed_dict[pk] for pk in keys]
-
-    generator_pyfunc_values = script_ops.py_func(
-        generator_iter, inp=[], Tout=dtypes_list, stateful=True)
-
-    pyfunc_input = {k: v for (k, v) in zip(keys, generator_pyfunc_values)}
-    for k, v in shapes.items():
-      pyfunc_input[k].set_shape(v)
-
-  return pyfunc_input
-
-
-__all__ = ["python_input"]
diff --git a/tensorflow/contrib/training/python/training/python_input_test.py b/tensorflow/contrib/training/python/training/python_input_test.py
deleted file mode 100644
index afd0f38c2c..0000000000
--- a/tensorflow/contrib/training/python/training/python_input_test.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for tf.contrib.training.python_input."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-from tensorflow.contrib.training.python.training import bucket_ops
-from tensorflow.contrib.training.python.training import python_input
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.ops import parsing_ops
-from tensorflow.python.platform import test
-from tensorflow.python.training import coordinator
-from tensorflow.python.training import input as core_input
-from tensorflow.python.training import queue_runner_impl
-
-
-class PythonInputTest(test.TestCase):
-
-  def testGenerator(self):
-    def simple_generator():
-      for i in range(2):
-        yield {"value": i, "ignored": 3}
-
-    simple_features = {
-        "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32)
-    }
-    tensors = python_input.python_input(simple_generator, simple_features)
-    self.assertEqual(["value"], tensors.keys())
-    self.assertEqual(dtypes.int32, tensors["value"].dtype)
-    self.assertEqual((), tensors["value"].shape)
-
-    with self.test_session() as sess:
-      self.assertEqual({"value": 0}, sess.run(tensors))
-      self.assertEqual({"value": 1}, sess.run(tensors))
-      with self.assertRaisesOpError("Iteration finished"):
-        sess.run(tensors)
-
-  def testInvalidGenerator(self):
-    generator1 = lambda: iter([{"value": "a"}])
-    int_features = {
-        "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32)
-    }
-    tensors1 = python_input.python_input(generator1, int_features)
-
-    with self.test_session() as sess:
-      with self.assertRaisesOpError("invalid literal"):
-        # Can't convert a string to an integer
-        sess.run(tensors1)
-
-    generator2 = lambda: iter([None])
-    tensors2 = python_input.python_input(generator2, int_features)
-
-    with self.test_session() as sess:
-      with self.assertRaisesOpError("generator must return dict"):
-        sess.run(tensors2)
-
-    generator3 = lambda: iter([{"value": [1, 2]}])
-    tensors3 = python_input.python_input(generator3, int_features)
-
-    with self.test_session() as sess:
-      with self.assertRaisesOpError("incompatible with declared shape"):
-        sess.run(tensors3)
-
-  def testGeneratorWorksWithBatching(self):
-    def simple_generator():
-      for i in range(5):
-        yield {"value": i, "ignored": 3}
-
-    simple_features = {
-        "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32)
-    }
-    tensors = python_input.python_input(simple_generator, simple_features)
-
-    # Request batches of size 4 at a time, the final batch may be smaller.
-    batched_tensors = core_input.batch(tensors, batch_size=4,
-                                       allow_smaller_final_batch=True)
-
-    self.assertEqual(["value"], batched_tensors.keys())
-    self.assertEqual(dtypes.int32, batched_tensors["value"].dtype)
-    self.assertEqual([None], batched_tensors["value"].shape.as_list())
-
-    with self.test_session() as sess:
-      # The generator emits 5 items total.  The first 4 are returned in
-      # the first session run; the final one is returned in the
-      # second.  This works because allow_smaller_final_batch=True.
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
-      r1 = sess.run(batched_tensors)
-      r2 = sess.run(batched_tensors)
-      self.assertAllEqual([0, 1, 2, 3], r1["value"])
-      self.assertEqual([4], r2["value"])
-      with self.assertRaisesOpError("Iteration finished"):
-        sess.run(tensors)
-      coord.request_stop()
-      for thread in threads:
-        thread.join()
-
-  def testGeneratorWorksWithManyBatchingThreads(self):
-    def simple_generator():
-      for i in range(5000):
-        yield {"value": i, "ignored": 3}
-
-    simple_features = {
-        "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32)
-    }
-    tensors = python_input.python_input(simple_generator, simple_features)
-
-    # Request batches of size 20 at a time, the final batch may be smaller.
-    _, batched_tensors = bucket_ops.bucket(
-        tensors, which_bucket=tensors["value"] % 5,
-        batch_size=20, num_buckets=5, num_threads=7, capacity=17,
-        allow_smaller_final_batch=True)
-
-    self.assertEqual(["value"], batched_tensors.keys())
-    self.assertEqual(dtypes.int32, batched_tensors["value"].dtype)
-    self.assertEqual([None], batched_tensors["value"].shape.as_list())
-
-    with self.test_session() as sess:
-      # The generator emits 5 items total.  The first 4 are returned in
-      # the first session run; the final one is returned in the
-      # second.  This works because allow_smaller_final_batch=True.
-      coord = coordinator.Coordinator()
-      threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord)
-      results = []
-      while True:
-        try:
-          r = sess.run(batched_tensors)
-          results.extend(r["value"].tolist())
-        except errors.OutOfRangeError:
-          break
-      coord.request_stop()
-      for thread in threads:
-        thread.join()
-    self.assertEqual(sorted(results),
-                     list(range(5000)))
-
-  def testVaryingFieldsInGenerator(self):
-    def simple_generator():
-      for i in range(2):
-        yield {"value": i,
-               "seqlen_value": np.ones((i, 1))}
-
-    simple_features = {
-        "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32),
-        "seqlen_value": parsing_ops.FixedLenSequenceFeature(
-            shape=[1], dtype=dtypes.float32, allow_missing=True),
-        "empty_value": parsing_ops.FixedLenFeature(
-            default_value=[-1, -2], dtype=dtypes.int32, shape=[2])
-    }
-    tensors = python_input.python_input(simple_generator, simple_features)
-    self.assertEqual(
-        set(["value", "seqlen_value", "empty_value"]), set(tensors.keys()))
-    self.assertEqual(dtypes.int32, tensors["value"].dtype)
-    self.assertEqual((), tensors["value"].shape)
-    self.assertEqual(dtypes.float32, tensors["seqlen_value"].dtype)
-    self.assertEqual([None, 1], tensors["seqlen_value"].shape.as_list())
-    self.assertEqual(dtypes.int32, tensors["empty_value"].dtype)
-    self.assertEqual([2], tensors["empty_value"].shape)
-
-    with self.test_session() as sess:
-      r1 = sess.run(tensors)
-      self.assertAllEqual(0, r1["value"])
-      self.assertAllEqual(np.ones((0, 1)), r1["seqlen_value"])
-      self.assertAllEqual([-1, -2], r1["empty_value"])
-
-      r2 = sess.run(tensors)
-      self.assertAllEqual(1, r2["value"])
-      self.assertAllEqual([[1]], r2["seqlen_value"])
-      self.assertAllEqual([-1, -2], r2["empty_value"])
-
-      with self.assertRaisesOpError("Iteration finished"):
-        sess.run(tensors)
-
-
-if __name__ == "__main__":
-  test.main()
-- 
GitLab


From adf6f98b6bea7a3b7d32d15450ba594fa3cb8038 Mon Sep 17 00:00:00 2001
From: Shanqing Cai 
Date: Wed, 30 Aug 2017 10:55:29 -0700
Subject: [PATCH 097/294] Remove duplicate line in gcs_smoke.sh

PiperOrigin-RevId: 167017337
---
 tensorflow/tools/gcs_test/gcs_smoke.sh | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tensorflow/tools/gcs_test/gcs_smoke.sh b/tensorflow/tools/gcs_test/gcs_smoke.sh
index ec7ee4fbb0..69c632f2cf 100755
--- a/tensorflow/tools/gcs_test/gcs_smoke.sh
+++ b/tensorflow/tools/gcs_test/gcs_smoke.sh
@@ -65,10 +65,6 @@ echo "Building in temporary directory: ${BUILD_DIR}"
 cp -r ${SCRIPT_DIR}/* "${BUILD_DIR}"/ || \
     die "Failed to copy files to ${BUILD_DIR}"
 
-# Download whl file into the build context directory.
-wget -P "${BUILD_DIR}" ${WHL_URL} || \
-    die "Failed to download tensorflow whl file from URL: ${WHL_URL}"
-
 DOCKERFILE="${BUILD_DIR}/Dockerfile"
 if [[ ! -f "${DOCKERFILE}" ]]; then
   die "ERROR: Cannot find Dockerfile at expected path ${DOCKERFILE}"
-- 
GitLab


From f0cf9811746aec93d37a73a67dafba7b57819c53 Mon Sep 17 00:00:00 2001
From: Vivek Rane 
Date: Wed, 30 Aug 2017 11:06:19 -0700
Subject: [PATCH 098/294] Added TODO for adding an implementation for
 Element-Wise ops that doesn't depend on Eigen.

---
 tensorflow/core/graph/mkl_layout_pass.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index d35fb82d77..3a8f9ea537 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1994,6 +1994,9 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
   // incoming edges are in TF format, we don't need all this overhead, so
   // replace the elementwise node only if at least one of its parents is a MKL
   // node.
+  //
+  // TODO(vrane): Add implementation for element-wise ops that doesn't reuse
+  // eigen code to reduce cross-library dependency.
   if (mkl_op_registry::IsMklElementWiseOp(
           mkl_op_registry::GetMklOpName(n->type_string()), T)) {
     bool incoming_mkl_edge = false;
-- 
GitLab


From 001062b4aaf5adf98c97462c64f4bc8642ea4a7c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 11:06:37 -0700
Subject: [PATCH 099/294] Add a few more eager test coverage.

PiperOrigin-RevId: 167019319
---
 tensorflow/python/ops/math_ops_test.py | 76 +++++++++++++++++---------
 1 file changed, 49 insertions(+), 27 deletions(-)

diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index f1103f209c..4642f4c580 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -26,6 +27,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 
@@ -37,23 +39,32 @@ log = np.log
 
 class ReduceTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def testReduceAllDims(self):
     x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32)
-    with self.test_session(use_gpu=True):
-      y_tf = math_ops.reduce_sum(x).eval()
+    with test_util.device(use_gpu=True):
+      y_tf = self.evaluate(math_ops.reduce_sum(x))
       self.assertEqual(y_tf, 21)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testReduceExplicitAxes(self):
     x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32)
-    with self.test_session(use_gpu=True):
+    with test_util.device(use_gpu=True):
       for axis in (0, -2, (0, 0), (0, -2)):
-        self.assertAllEqual(math_ops.reduce_sum(x, axis=axis).eval(), [5, 7, 9])
+        self.assertAllEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)),
+                            [5, 7, 9])
       for axis in (1, -1, (1, 1), (1, -1)):
-        self.assertAllEqual(math_ops.reduce_sum(x, axis=axis).eval(), [6, 15])
+        self.assertAllEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)),
+                            [6, 15])
       for axis in (None, (0, 1), (-1, -2), (-2, -1, 0, 1)):
-        self.assertEqual(math_ops.reduce_sum(x, axis=axis).eval(), 21)
+        self.assertEqual(self.evaluate(math_ops.reduce_sum(x, axis=axis)), 21)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testReduceInvalidAxis(self):
+    if context.in_eager_mode():
+      # The shape check is in run a graph contruction time. In eager mode,
+      # it misses the check, magically return result given wrong shape.
+      return
     x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32)
     axis = np.array([[0], [1]])
     with self.assertRaisesRegexp(errors.InvalidArgumentError,
@@ -142,16 +153,17 @@ class LogSumExpTest(test_util.TensorFlowTestCase):
 
 class RoundTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def testRounding(self):
     x = [0.49, 0.7, -0.3, -0.8]
     # TODO(nolivia): Remove this when RoundOp is forwards compatible
     # x = np.arange(-5.0, 5.0, .25)
     for dtype in [np.float32, np.double, np.int32]:
       x_np = np.array(x, dtype=dtype)
-      with self.test_session(use_gpu=True):
+      with test_util.device(use_gpu=True):
         x_tf = constant_op.constant(x_np, shape=x_np.shape)
         y_tf = math_ops.round(x_tf)
-        y_tf_np = y_tf.eval()
+        y_tf_np = self.evaluate(y_tf)
         y_np = np.round(x_np)
         self.assertAllClose(y_tf_np, y_np, atol=1e-2)
 
@@ -187,77 +199,87 @@ class ModTest(test_util.TensorFlowTestCase):
 
 class SquaredDifferenceTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def testSquaredDifference(self):
     for dtype in [np.int32, np.float16]:
       x = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype)
       y = np.array([-3, -2, -1], dtype=dtype)
       z = (x - y) * (x - y)
-      with self.test_session(use_gpu=True):
-        z_tf = math_ops.squared_difference(x, y).eval()
+      with test_util.device(use_gpu=True):
+        z_tf = self.evaluate(math_ops.squared_difference(x, y))
         self.assertAllClose(z, z_tf)
 
 
 class ApproximateEqualTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def testApproximateEqual(self):
     for dtype in [np.float32, np.double]:
       x = dtype(1)
       y = dtype(1.00009)
       z = False
-      with self.test_session(use_gpu=True):
+      with test_util.device(use_gpu=True):
         # Default tolerance is 0.00001
-        z_tf = math_ops.approximate_equal(x, y).eval()
+        z_tf = self.evaluate(math_ops.approximate_equal(x, y))
         self.assertAllEqual(z, z_tf)
 
     for dtype in [np.float32, np.double]:
       x = dtype(1)
       y = dtype(1.000009)
       z = True
-      with self.test_session(use_gpu=True):
+      with test_util.device(use_gpu=True):
         # Default tolerance is 0.00001
-        z_tf = math_ops.approximate_equal(x, y).eval()
+        z_tf = self.evaluate(math_ops.approximate_equal(x, y))
         self.assertAllEqual(z, z_tf)
 
     for dtype in [np.float32, np.double]:
       x = np.array([[[[-1, 2.00009999], [-3, 4.01]]]], dtype=dtype)
       y = np.array([[[[-1.001, 2], [-3.00009, 4]]]], dtype=dtype)
       z = np.array([[[[False, True], [True, False]]]], dtype=np.bool)
-      with self.test_session(use_gpu=True):
-        z_tf = math_ops.approximate_equal(x, y, tolerance=0.0001).eval()
+      with test_util.device(use_gpu=True):
+        z_tf = self.evaluate(math_ops.approximate_equal(x, y, tolerance=0.0001))
         self.assertAllEqual(z, z_tf)
 
 
 class ScalarMulTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def testAcceptsRefs(self):
-    var = variables.Variable(10)
+    if context.in_eager_mode():
+      var = resource_variable_ops.ResourceVariable(10, name="var")
+    else:
+      var = variables.Variable(10)
     result = math_ops.scalar_mul(3, var)
     init = variables.global_variables_initializer()
-    with self.test_session(use_gpu=True) as sess:
-      sess.run(init)
-      self.assertEqual(30, result.eval())
+    with test_util.device(use_gpu=True):
+      self.evaluate(init)
+      self.assertEqual(30, self.evaluate(result))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testAcceptsConstant(self):
     const = constant_op.constant(10)
     result = math_ops.scalar_mul(3, const)
-    with self.test_session(use_gpu=True):
-      self.assertEqual(30, result.eval())
+    with test_util.device(use_gpu=True):
+      self.assertEqual(30, self.evaluate(result))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testAcceptsTensor(self):
     tensor = array_ops.ones([10, 10])
     result = math_ops.scalar_mul(3, tensor)
     expected = array_ops.ones([10, 10]) * 3
 
-    with self.test_session(use_gpu=True):
-      self.assertAllEqual(expected.eval(), result.eval())
+    with test_util.device(use_gpu=True):
+      self.assertAllEqual(self.evaluate(expected), self.evaluate(result))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testAcceptsIndexedSlices(self):
     values = constant_op.constant([2, 3, 5, 7, 0, -1], shape=[3, 2])
     indices = constant_op.constant([0, 2, 5])
     x = math_ops.scalar_mul(-3, ops.IndexedSlices(values, indices))
-    with self.test_session(use_gpu=True):
-      self.assertAllEqual(x.values.eval(), [[-6, -9], [-15, -21], [0, 3]])
-      self.assertAllEqual(x.indices.eval(), [0, 2, 5])
+    with test_util.device(use_gpu=True):
+      self.assertAllEqual(self.evaluate(x.values),
+                          [[-6, -9], [-15, -21], [0, 3]])
+      self.assertAllEqual(self.evaluate(x.indices), [0, 2, 5])
 
 
 class AccumulateNTest(test_util.TensorFlowTestCase):
-- 
GitLab


From d733820a1aa02631cb108842a45f33fb2b87e72a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 11:10:17 -0700
Subject: [PATCH 100/294] Make core layers EAGER model friendly.

PiperOrigin-RevId: 167019943
---
 tensorflow/python/BUILD               |   7 +
 tensorflow/python/layers/base.py      | 251 +++++++++++++++-----
 tensorflow/python/layers/base_test.py | 330 +++++++++++++++-----------
 tensorflow/python/layers/core.py      |   9 +-
 tensorflow/python/layers/core_test.py | 130 ++++++----
 tensorflow/python/ops/nn_ops.py       |  35 +--
 6 files changed, 484 insertions(+), 278 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 89f0ab48a0..6597889fbc 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1774,6 +1774,7 @@ py_library(
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:custom_gradient",
+        "//tensorflow/python/eager:tape",
         "//tensorflow/python/eager:tensor",
     ],
 )
@@ -3734,6 +3735,7 @@ py_library(
         ":util",
         ":variable_scope",
         ":variables",
+        "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
         "@six_archive//:six",
     ],
@@ -3748,6 +3750,7 @@ py_test(
     deps = [
         ":client_testlib",
         ":framework_for_generated_wrappers",
+        ":framework_test_lib",
         ":init_ops",
         ":layers",
         ":math_ops",
@@ -3766,6 +3769,7 @@ py_test(
         ":array_ops",
         ":client_testlib",
         ":framework_for_generated_wrappers",
+        ":framework_test_lib",
         ":layers",
         ":math_ops",
         ":nn_ops",
@@ -3785,6 +3789,7 @@ py_test(
     deps = [
         ":client_testlib",
         ":framework_for_generated_wrappers",
+        ":framework_test_lib",
         ":layers",
         ":math_ops",
         ":nn_ops",
@@ -3812,6 +3817,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":client_testlib",
+        ":framework_test_lib",
         ":layers",
         ":random_ops",
     ],
@@ -3825,6 +3831,7 @@ cuda_py_test(
         ":array_ops",
         ":client_testlib",
         ":framework_for_generated_wrappers",
+        ":framework_test_lib",
         ":layers",
         ":math_ops",
         ":random_ops",
diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index 5f21a2bdfa..ab5557672f 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -33,6 +33,7 @@ from six.moves import xrange  # pylint: disable=redefined-builtin
 import numpy as np
 import six
 
+from tensorflow.python.eager import context
 from tensorflow.python.estimator import util as estimator_util
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import dtypes
@@ -102,6 +103,8 @@ class Layer(object):
     self._per_input_updates = {}
     self.dtype = dtypes.as_dtype(dtype).name
     self.input_spec = None
+    self._compute_previous_mask = ('mask' in estimator_util.fn_args(self.call)
+                                   or hasattr(self, 'compute_mask'))
 
     # These lists will be filled via successive calls
     # to self._add_inbound_node().
@@ -181,6 +184,8 @@ class Layer(object):
 
   @property
   def updates(self):
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.updates not supported in Eager mode.')
     return self._updates
 
   def add_update(self, updates, inputs=None):
@@ -202,7 +207,12 @@ class Layer(object):
         match the `inputs` argument passed to the `__call__` method at the time
         the updates are created. If `None` is passed, the updates are assumed
         to be unconditional, and will apply across all dataflows of the layer.
+
+    Raises:
+      RuntimeError: If called in Eager mode.
     """
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.add_update not supported in Eager mode.')
     updates = _to_list(updates)
     if not updates:
       return
@@ -232,7 +242,12 @@ class Layer(object):
 
     Returns:
       List of update ops of the layer that depend on `inputs`.
+
+    Raises:
+      RuntimeError: If called in Eager mode.
     """
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.get_updates_for not supported in Eager mode.')
     if inputs is not None:
       inputs = _to_list(inputs)
     if not inputs:
@@ -245,6 +260,8 @@ class Layer(object):
 
   @property
   def losses(self):
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.losses not supported in Eager mode.')
     return self._losses
 
   def add_loss(self, losses, inputs=None):
@@ -266,7 +283,12 @@ class Layer(object):
         the losses are created. If `None` is passed, the losses are assumed
         to be unconditional, and will apply across all dataflows of the layer
         (e.g. weight regularization losses).
+
+    Raises:
+      RuntimeError: If called in Eager mode.
     """
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.add_loss not supported in Eager mode.')
     losses = _to_list(losses)
     if not losses:
       return
@@ -297,7 +319,12 @@ class Layer(object):
 
     Returns:
       List of loss tensors of the layer that depend on `inputs`.
+
+    Raises:
+      RuntimeError: If called in Eager mode.
     """
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.get_losses_for not supported in Eager mode.')
     if inputs is not None:
       inputs = _to_list(inputs)
     if not inputs:
@@ -376,15 +403,24 @@ class Layer(object):
 
     Returns:
       The created variable.
+
+    Raises:
+      RuntimeError: If called in Eager mode with regularizers.
     """
+    # Note that we currently don't support variable regularization in Eager
+    # mode. An alternative is for users to directly compute these losses before
+    # performing a backward pass.
+    if regularizer is not None and context.in_eager_mode():
+      raise RuntimeError('Variable regularization not supported in Eager mode.')
     if dtype is None:
       dtype = self.dtype
     existing_variables = set(tf_variables.global_variables())
 
     self._set_scope(None)
 
-    with vs.variable_scope(self._scope,
-                           reuse=self.built or self._reuse) as scope:
+    vs_reuse = ((self.built or self._reuse)
+                if context.in_graph_mode() else vs.AUTO_REUSE)
+    with vs.variable_scope(self._scope, reuse=vs_reuse) as scope:
       with ops.name_scope(scope.original_name_scope):
         variable = vs.get_variable(name,
                                    shape=shape,
@@ -446,29 +482,48 @@ class Layer(object):
     """
     self._set_scope(kwargs.pop('scope', None))
 
+    in_graph_mode = context.in_graph_mode()
     # Ensure the Layer, if being reused, is working with inputs from
     # the same graph as where it was created.
-    try:
-      ops._get_graph_from_inputs(nest.flatten(inputs), graph=self.graph)  # pylint: disable=protected-access
-    except ValueError as e:
-      raise ValueError('Input graph and Layer graph are not the same: %s' % e)
+    if in_graph_mode:
+      try:
+        ops._get_graph_from_inputs(nest.flatten(inputs), graph=self.graph)  # pylint: disable=protected-access
+      except ValueError as e:
+        raise ValueError('Input graph and Layer graph are not the same: %s' % e)
+      user_kwargs = copy.copy(kwargs)
 
     # Handle Keras mask propagation from previous layer to current layer.
-    previous_mask = _collect_previous_mask(inputs)
-    user_kwargs = copy.copy(kwargs)
-    if not _is_all_none(previous_mask):
-      # The previous layer generated a mask, which we should use as default
-      # for any "mask" argument in `call`.
-      if 'mask' in estimator_util.fn_args(self.call):
-        if 'mask' not in kwargs:
-          # If mask is explicitly passed to __call__,
-          # we should override the default mask.
-          kwargs['mask'] = previous_mask
-
-    with vs.variable_scope(self._scope,
-                           reuse=self.built or self._reuse) as scope:
+    previous_mask = None
+    if (not hasattr(self, '_compute_previous_mask') or
+        self._compute_previous_mask):
+      previous_mask = _collect_previous_mask(inputs)
+      if ('mask' in estimator_util.fn_args(self.call) and
+          'mask' not in kwargs and
+          not _is_all_none(previous_mask)):
+        # The previous layer generated a mask, and mask was not explicitly pass
+        # to __call__, hence we set previous_mask as the default value.
+        kwargs['mask'] = previous_mask
+
+    vs_reuse = ((self.built or self._reuse)
+                if context.in_graph_mode else vs.AUTO_REUSE)
+    with vs.variable_scope(self._scope, reuse=vs_reuse) as scope:
       with ops.name_scope(scope.original_name_scope):
         if not self.built:
+          if not in_graph_mode:
+            # Activity regularization is unsupported in Eager mode.
+            if hasattr(self,
+                       'activity_regularizer') and self.activity_regularizer:
+              raise ValueError('activity_regularizer currently unsupported in '
+                               'Eager mode. Found an activity_regularizer in '
+                               '%s(%s).' % (self.__class__.__name__, self))
+            # TODO(agarwal): support _keras_history in Eager mode.
+            for x in _to_list(inputs):
+              if hasattr(x, '_keras_history'):
+                raise ValueError('_keras_history currently unsupported in '
+                                 'Eager mode. Found _keras_history in %s while '
+                                 'executing __call__ for %s(%s)' %
+                                 (x, self.__class_.__name__, self))
+
           # Check input assumptions set before layer building, e.g. input rank.
           self._assert_input_compatibility(inputs)
           input_list = nest.flatten(inputs)
@@ -480,24 +535,27 @@ class Layer(object):
         if 'scope' in estimator_util.fn_args(self.call):
           kwargs['scope'] = scope
         # Check input assumptions set after layer building, e.g. input shape.
-        self._assert_input_compatibility(inputs)
+        if in_graph_mode:
+          self._assert_input_compatibility(inputs)
         outputs = self.call(inputs, *args, **kwargs)
 
         if outputs is None:
           raise ValueError('A layer\'s `call` method should return a Tensor '
                            'or a list of Tensors, not None.')
 
-        # Apply activity regularization.
-        # Note that it should be applied every time the layer creates a new
-        # output, since it is output-specific.
-        if hasattr(self, 'activity_regularizer') and self.activity_regularizer:
-          output_list = _to_list(outputs)
-          for output in output_list:
-            with ops.name_scope('ActivityRegularizer'):
-              activity_regularization = self.activity_regularizer(output)
-            self.add_loss(activity_regularization)
-            _add_elements_to_collection(
-                activity_regularization, ops.GraphKeys.REGULARIZATION_LOSSES)
+        if in_graph_mode:
+          # Apply activity regularization.
+          # Note that it should be applied every time the layer creates a new
+          # output, since it is output-specific.
+          if hasattr(self,
+                     'activity_regularizer') and self.activity_regularizer:
+            output_list = _to_list(outputs)
+            for output in output_list:
+              with ops.name_scope('ActivityRegularizer'):
+                activity_regularization = self.activity_regularizer(output)
+              self.add_loss(activity_regularization)
+              _add_elements_to_collection(activity_regularization,
+                                          ops.GraphKeys.REGULARIZATION_LOSSES)
 
         # Handle mask computation and propagation to the next layer.
         if hasattr(self, 'compute_mask'):
@@ -510,40 +568,42 @@ class Layer(object):
           else:
             outputs._keras_mask = output_mask  # pylint: disable=protected-access
 
-    # If all input tensors have history metadata,
-    # we update the output tensors
-    # with corresponding history metadata, thus eventually allowing to use
-    # these tensors to instantiate a Network.
-    if _have_all_keras_metadata(inputs):
-      # If the layer returns tensors from its inputs, unmodified,
-      # we copy them to avoid loss of tensor metadata.
-      output_ls = _to_list(outputs)
-      inputs_ls = _to_list(inputs)
-      output_ls_copy = []
-      for x in output_ls:
-        if x in inputs_ls:
-          with ops.name_scope(scope.original_name_scope):
-            x = array_ops.identity(x)
-        output_ls_copy.append(x)
-      if len(output_ls_copy) == 1:
-        outputs = output_ls_copy[0]
-      else:
-        outputs = output_ls_copy
+    if in_graph_mode:
+      # If all input tensors have history metadata,
+      # we update the output tensors
+      # with corresponding history metadata, thus eventually allowing to use
+      # these tensors to instantiate a Network.
+      if _have_all_keras_metadata(inputs):
+        # If the layer returns tensors from its inputs, unmodified,
+        # we copy them to avoid loss of tensor metadata.
+        output_ls = _to_list(outputs)
+        inputs_ls = _to_list(inputs)
+        output_ls_copy = []
+        for x in output_ls:
+          if x in inputs_ls:
+            with ops.name_scope(scope.original_name_scope):
+              x = array_ops.identity(x)
+          output_ls_copy.append(x)
+        if len(output_ls_copy) == 1:
+          outputs = output_ls_copy[0]
+        else:
+          outputs = output_ls_copy
 
-      # Add an inbound node to the layer, so it can keep track of this call.
-      # This updates the layer history of the output tensor(s).
-      self._add_inbound_node(
-          input_tensors=inputs,
-          output_tensors=outputs,
-          arguments=user_kwargs)
+        # Add an inbound node to the layer, so it can keep track of this call.
+        # This updates the layer history of the output tensor(s).
+        self._add_inbound_node(
+            input_tensors=inputs, output_tensors=outputs, arguments=user_kwargs)
+
+      # Update global default collections.
+      _add_elements_to_collection(self.updates, ops.GraphKeys.UPDATE_OPS)
 
-    # Update global default collections.
-    _add_elements_to_collection(self.updates, ops.GraphKeys.UPDATE_OPS)
     self.built = True
     return outputs
 
   @property
   def graph(self):
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.graph not supported in Eager mode.')
     return self._graph
 
   def __deepcopy__(self, memo):
@@ -590,6 +650,7 @@ class Layer(object):
         arguments: dictionary of keyword arguments that were passed to the
             `call` method of the layer at the call that created the node.
     """
+    assert context.in_graph_mode()
     input_tensors = _to_list(input_tensors)
     output_tensors = _to_list(output_tensors)
 
@@ -642,9 +703,11 @@ class Layer(object):
         The layer's attribute `attr` at the node of index `node_index`.
 
     Raises:
-        RuntimeError: If the layer has no inbound nodes.
+        RuntimeError: If the layer has no inbound nodes, or if called in Eager
+        mode.
         ValueError: If the index provided does not match any node.
     """
+    assert context.in_graph_mode()
     if not self.inbound_nodes:
       raise RuntimeError('The layer has never been called '
                          'and thus has no defined ' + attr_name + '.')
@@ -670,7 +733,13 @@ class Layer(object):
     Returns:
         A shape tuple
         (or list of shape tuples if the layer has multiple inputs).
+
+    Raises:
+      RuntimeError: If called in Eager mode.
     """
+    if context.in_eager_mode():
+      raise RuntimeError(
+          'Layer.get_input_shape_at not supported in Eager mode.')
     return self._get_node_attribute_at_index(node_index, 'input_shapes',
                                              'input shape')
 
@@ -686,7 +755,13 @@ class Layer(object):
     Returns:
         A shape tuple
         (or list of shape tuples if the layer has multiple outputs).
+
+    Raises:
+      RuntimeError: If called in Eager mode.
     """
+    if context.in_eager_mode():
+      raise RuntimeError(
+          'Layer.get_output_shape_at not supported in Eager mode.')
     return self._get_node_attribute_at_index(node_index, 'output_shapes',
                                              'output shape')
 
@@ -701,7 +776,12 @@ class Layer(object):
 
     Returns:
         A tensor (or list of tensors if the layer has multiple inputs).
+
+    Raises:
+      RuntimeError: If called in Eager mode.
     """
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.get_input_at not supported in Eager mode.')
     return self._get_node_attribute_at_index(node_index, 'input_tensors',
                                              'input')
 
@@ -716,7 +796,12 @@ class Layer(object):
 
     Returns:
         A tensor (or list of tensors if the layer has multiple outputs).
+
+    Raises:
+      RuntimeError: If called in Eager mode.
     """
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.get_output_at not supported in Eager mode.')
     return self._get_node_attribute_at_index(node_index, 'output_tensors',
                                              'output')
 
@@ -733,7 +818,13 @@ class Layer(object):
     Raises:
         AttributeError: if the layer is connected to
         more than one incoming layers.
+
+    Raises:
+      RuntimeError: If called in Eager mode.
+      AttributeError: If no inbound nodes are found.
     """
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.input not supported in Eager mode.')
     if not self.inbound_nodes:
       raise AttributeError('Layer ' + self.name +
                            ' is not connected, no input to return.')
@@ -747,12 +838,15 @@ class Layer(object):
     i.e. if it is connected to one incoming layer.
 
     Returns:
-        Output tensor or list of output tensors.
+      Output tensor or list of output tensors.
 
     Raises:
-        AttributeError: if the layer is connected to
-        more than one incoming layers.
+      AttributeError: if the layer is connected to more than one incoming
+        layers.
+      RuntimeError: if called in Eager mode.
     """
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.output not supported in Eager mode.')
     if not self.inbound_nodes:
       raise AttributeError('Layer ' + self.name + ' has no inbound nodes.')
     return self._get_node_attribute_at_index(0, 'output_tensors', 'output')
@@ -771,7 +865,10 @@ class Layer(object):
 
     Raises:
         AttributeError: if the layer has no defined input_shape.
+        RuntimeError: if called in Eager mode.
     """
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.input_shape not supported in Eager mode.')
     if not self.inbound_nodes:
       raise AttributeError('The layer has never been called '
                            'and thus has no defined input shape.')
@@ -829,7 +926,10 @@ class Layer(object):
 
     Raises:
         AttributeError: if the layer has no defined output shape.
+        RuntimeError: if called in Eager mode.
     """
+    if context.in_eager_mode():
+      raise RuntimeError('Layer.output_shape not supported in Eager mode.')
     if not self.inbound_nodes:
       raise AttributeError('The layer has never been called '
                            'and thus has no defined output shape.')
@@ -897,8 +997,8 @@ class Layer(object):
         if ndim != spec.ndim:
           raise ValueError('Input ' + str(input_index) + ' of layer ' +
                            self.name + ' is incompatible with the layer: '
-                           'expected ndim=' + str(spec.ndim) + ', found ndim='
-                           + str(ndim) + '. Full shape received: ' +
+                           'expected ndim=' + str(spec.ndim) + ', found ndim=' +
+                           str(ndim) + '. Full shape received: ' +
                            str(x.get_shape().as_list()))
       if spec.max_ndim is not None:
         ndim = x.get_shape().ndims
@@ -1115,6 +1215,9 @@ class InputLayer(Layer):
       sparse: Boolean, whether the placeholder created
           is meant to be sparse.
       name: Name of the layer (string).
+
+    Raises:
+      RuntimeError: If created in Eager mode.
   """
 
   def __init__(self,
@@ -1124,6 +1227,8 @@ class InputLayer(Layer):
                input_tensor=None,
                sparse=False,
                name=None):
+    if context.in_eager_mode():
+      raise RuntimeError('InputLayer not supported in Eager mode.')
     super(InputLayer, self).__init__(dtype=dtype, name=name)
     self.built = True
     self.sparse = sparse
@@ -1210,7 +1315,12 @@ def Input(  # pylint: disable=invalid-name
   Returns:
       A tensor: either a new placeholder (with history metadata) or
       `tensor` (if passed), with added history metadata.
+
+  Raises:
+    RuntimeError: If called in Eager mode.
   """
+  if context.in_eager_mode():
+    raise RuntimeError('Input not supported in Eager mode.')
   input_layer = InputLayer(
       input_shape=shape,
       batch_size=batch_size,
@@ -1268,9 +1378,15 @@ class Network(Layer):
   Methods:
     Network has the same methods as Layer. On top of it, it also has:
       - get_layer: retrieves a child layer by name or index in the graph.
+
+  Raises:
+    RuntimeError: If created in Eager mode.
   """
 
   def __init__(self, inputs, outputs, name=None):  # pylint: disable=super-init-not-called
+    # TODO(agarwal): Make Network work in Eager mode.
+    if context.in_eager_mode():
+      raise RuntimeError('Network not supported in Eager mode.')
     # Set layer name and scope
     if isinstance(name, vs.VariableScope):
       base_name = name.name
@@ -1282,6 +1398,8 @@ class Network(Layer):
       self.name = _unique_layer_name(base_name)
     self._scope = next(vs.variable_scope(None, default_name=base_name).gen)
     self._base_name = base_name
+    self._compute_previous_mask = ('mask' in estimator_util.fn_args(self.call)
+                                   or hasattr(self, 'compute_mask'))
 
     # This acts just like the `trainable` attribute of any layer instance.
     # It does not affect users of the underlying layers, only users of the
@@ -2033,6 +2151,10 @@ def _to_list(x):
 
 
 def _add_elements_to_collection(elements, collection_list):
+  if context.in_eager_mode():
+    raise RuntimeError('Using collections from Layers not supported in Eager '
+                       'mode. Tried to add %s to %s' % (elements,
+                                                        collection_list))
   elements = _to_list(elements)
   collection_list = _to_list(collection_list)
   for name in collection_list:
@@ -2102,6 +2224,7 @@ def _collect_previous_mask(input_tensors):
     return masks[0]
   return masks
 
+
 # A global dictionary mapping graph objects to an index of counters used
 # for various layer names in each graph.
 # Allows to give unique autogenerated names to layers, in a graph-specific way.
diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py
index a1af153e55..ced5466beb 100644
--- a/tensorflow/python/layers/base_test.py
+++ b/tensorflow/python/layers/base_test.py
@@ -20,7 +20,11 @@ from __future__ import print_function
 
 import copy
 
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.layers import base as base_layers
 from tensorflow.python.layers import core as core_layers
 from tensorflow.python.ops import array_ops
@@ -34,45 +38,48 @@ from tensorflow.python.platform import test
 
 class BaseLayerTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def testLayerProperties(self):
     layer = base_layers.Layer(name='my_layer')
     self.assertListEqual(layer.variables, [])
     self.assertListEqual(layer.trainable_variables, [])
     self.assertListEqual(layer.non_trainable_variables, [])
-    self.assertListEqual(layer.updates, [])
-    self.assertListEqual(layer.losses, [])
+    if context.in_graph_mode():
+      # updates, losses only suppported in GRAPH mode
+      self.assertListEqual(layer.updates, [])
+      self.assertListEqual(layer.losses, [])
     self.assertEqual(layer.built, False)
     layer = base_layers.Layer(name='my_layer', trainable=False)
     self.assertEqual(layer.trainable, False)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testAddWeight(self):
-    with self.test_session():
-      layer = base_layers.Layer(name='my_layer')
+    layer = base_layers.Layer(name='my_layer')
 
-      # Test basic variable creation.
-      variable = layer.add_variable(
-          'my_var', [2, 2], initializer=init_ops.zeros_initializer())
-      self.assertEqual(variable.name, 'my_layer/my_var:0')
-      self.assertListEqual(layer.variables, [variable])
-      self.assertListEqual(layer.trainable_variables, [variable])
-      self.assertListEqual(layer.non_trainable_variables, [])
-      self.assertListEqual(
-          layer.variables,
-          ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
-
-      # Test non-trainable variable creation.
-      # layer.add_variable should work even outside `build` and `call`.
-      variable_2 = layer.add_variable(
-          'non_trainable_var', [2, 2],
-          initializer=init_ops.zeros_initializer(),
-          trainable=False)
-      self.assertListEqual(layer.variables, [variable, variable_2])
-      self.assertListEqual(layer.trainable_variables, [variable])
-      self.assertListEqual(layer.non_trainable_variables, [variable_2])
-      self.assertEqual(
-          len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 1)
-
-      # Test with regularizer.
+    # Test basic variable creation.
+    variable = layer.add_variable(
+        'my_var', [2, 2], initializer=init_ops.zeros_initializer())
+    self.assertEqual(variable.name, 'my_layer/my_var:0')
+    self.assertListEqual(layer.variables, [variable])
+    self.assertListEqual(layer.trainable_variables, [variable])
+    self.assertListEqual(layer.non_trainable_variables, [])
+    self.assertListEqual(layer.variables,
+                         ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))
+
+    # Test non-trainable variable creation.
+    # layer.add_variable should work even outside `build` and `call`.
+    variable_2 = layer.add_variable(
+        'non_trainable_var', [2, 2],
+        initializer=init_ops.zeros_initializer(),
+        trainable=False)
+    self.assertListEqual(layer.variables, [variable, variable_2])
+    self.assertListEqual(layer.trainable_variables, [variable])
+    self.assertListEqual(layer.non_trainable_variables, [variable_2])
+    self.assertEqual(
+        len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 1)
+
+    if context.in_graph_mode():
+      # regularizers only supported in GRAPH mode.
       regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3
       variable = layer.add_variable(
           'reg_var', [2, 2],
@@ -80,69 +87,63 @@ class BaseLayerTest(test.TestCase):
           regularizer=regularizer)
       self.assertEqual(len(layer.losses), 1)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testGetVariable(self):
-    with self.test_session():
 
-      class MyLayer(base_layers.Layer):
+    class MyLayer(base_layers.Layer):
 
-        def build(self, input_shape):
-          self.my_var = self.add_variable(
-              'my_var', [2, 2], initializer=init_ops.zeros_initializer())
+      def build(self, input_shape):
+        self.my_var = self.add_variable(
+            'my_var', [2, 2], initializer=init_ops.zeros_initializer())
 
-        def call(self, inputs):
-          return inputs * 2
+      def call(self, inputs):
+        return inputs * 2
 
-      layer = MyLayer(name='my_layer')
-      inputs = random_ops.random_uniform((5,), seed=1)
-      layer.apply(inputs)
-      layer.apply(inputs)
-      self.assertListEqual([v.name for v in layer.variables],
-                           ['my_layer/my_var:0'])
-
-      # Creating a layer with no scope leads to lazy construction of
-      # the scope at apply() time.  It uses scope "/base_name"
-      lazy_layer = MyLayer(_reuse=True)
-      with variable_scope.variable_scope('new_scope'):
-        # This should attempt to reuse 'my_var' in 'new_scope'
-        with self.assertRaisesRegexp(
-            ValueError, r'new_scope/my_layer/my_var does not exist'):
-          lazy_layer.apply(inputs)
-        with variable_scope.variable_scope('my_layer'):
-          variable_scope.get_variable('my_var', [2, 2])
-
-        # Smoke test: it runs.
-        lazy_layer.apply(inputs)
-        # The variables were created outside of the Layer, and
-        # reuse=True, so the Layer does not own them and they are not
-        # stored in its collection.
-        self.assertListEqual(lazy_layer.variables, [])
-        self.assertEqual(lazy_layer._scope.name, 'new_scope/my_layer')
-
-      # Creating a layer with no scope leads to lazy construction of
-      # the scope at apply() time.  If 'scope' argument is passed to
-      # apply(), it uses that scope when accessing variables.
-      lazy_layer = MyLayer(_reuse=True)
-      with variable_scope.variable_scope('new_scope') as new_scope:
-        # This should attempt to reuse 'my_var' in 'new_scope'
-        with self.assertRaisesRegexp(
-            ValueError, r'new_scope/my_var does not exist'):
-          lazy_layer.apply(inputs, scope=new_scope)
+    layer = MyLayer(name='my_layer')
+    inputs = random_ops.random_uniform((5,), seed=1)
+    layer.apply(inputs)
+    layer.apply(inputs)
+    self.assertListEqual([v.name for v in layer.variables],
+                         ['my_layer/my_var:0'])
+
+    # Creating a layer with no scope leads to lazy construction of
+    # the scope at apply() time.  It uses scope "/base_name"
+    lazy_layer = MyLayer(_reuse=True)
+    with variable_scope.variable_scope('new_scope'):
+      with variable_scope.variable_scope('my_layer'):
         variable_scope.get_variable('my_var', [2, 2])
 
-        # Smoke test: it runs.
-        lazy_layer.apply(inputs, scope=new_scope)
-        # The variables were created outside of the Layer, and
-        # reuse=True, so the Layer does not own them and they are not
-        # stored in its collection.
-        self.assertListEqual(lazy_layer.variables, [])
-        self.assertEqual(lazy_layer._scope.name, 'new_scope')
-
+      # Smoke test: it runs.
+      lazy_layer.apply(inputs)
+      # The variables were created outside of the Layer, and
+      # reuse=True, so the Layer does not own them and they are not
+      # stored in its collection.
+      self.assertListEqual(lazy_layer.variables, [])
+      self.assertEqual(lazy_layer._scope.name, 'new_scope/my_layer')
+
+    # Creating a layer with no scope leads to lazy construction of
+    # the scope at apply() time. If 'scope' argument is passed to
+    # apply(), it uses that scope when accessing variables.
+    lazy_layer = MyLayer(_reuse=True)
+    with variable_scope.variable_scope('new_scope') as new_scope:
+      variable_scope.get_variable('my_var', [2, 2])
+
+      # Smoke test: it runs.
+      lazy_layer.apply(inputs, scope=new_scope)
+      # The variables were created outside of the Layer, and
+      # reuse=True, so the Layer does not own them and they are not
+      # stored in its collection.
+      self.assertListEqual(lazy_layer.variables, [])
+      self.assertEqual(lazy_layer._scope.name, 'new_scope')
+
+    if context.in_graph_mode():
+      # Checking for graph equality is only done in GRAPH mode.
       with ops.Graph().as_default():
         inputs_ng = random_ops.random_uniform((5,), seed=1)
-        with self.assertRaisesRegexp(ValueError,
-                                     r'graph are not the same'):
+        with self.assertRaisesRegexp(ValueError, r'graph are not the same'):
           layer.apply(inputs_ng)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testCall(self):
 
     class MyLayer(base_layers.Layer):
@@ -154,9 +155,13 @@ class BaseLayerTest(test.TestCase):
     inputs = random_ops.random_uniform((5,), seed=1)
     outputs = layer.apply(inputs)
     self.assertEqual(layer.built, True)
-    self.assertEqual(outputs.op.name, 'my_layer/Square')
+    if context.in_graph_mode():
+      # op is only supported in GRAPH mode
+      self.assertEqual(outputs.op.name, 'my_layer/Square')
 
   def testFirstCallCanCreateVariablesButSecondCanNotWhenBuildEmpty(self):
+    # Note that this test is only run in Graph mode since with EAGER mode we can
+    # still create a new variable on second call.
 
     class MyLayer(base_layers.Layer):
 
@@ -177,15 +182,16 @@ class BaseLayerTest(test.TestCase):
     outputs = layer.apply(inputs)
     self.assertEqual(layer.built, True)
     self.assertEqual(outputs.op.name, 'my_layer/add')
-    self.assertListEqual(
-        [v.name for v in layer.variables], ['my_layer/my_var:0'])
+    self.assertListEqual([v.name
+                          for v in layer.variables], ['my_layer/my_var:0'])
     with self.assertRaisesRegexp(ValueError,
                                  'my_layer/this_will_break_on_second_call'):
       layer.apply(inputs)
     # The list of variables hasn't changed.
-    self.assertListEqual(
-        [v.name for v in layer.variables], ['my_layer/my_var:0'])
+    self.assertListEqual([v.name
+                          for v in layer.variables], ['my_layer/my_var:0'])
 
+  @test_util.run_in_graph_and_eager_modes()
   def testDeepCopy(self):
 
     class MyLayer(base_layers.Layer):
@@ -198,7 +204,9 @@ class BaseLayerTest(test.TestCase):
     inputs = random_ops.random_uniform((5,), seed=1)
     outputs = layer.apply(inputs)
     self.assertEqual(layer.built, True)
-    self.assertEqual(outputs.op.name, 'my_layer/Square')
+    if context.in_graph_mode():
+      # op only supported in GRAPH mode.
+      self.assertEqual(outputs.op.name, 'my_layer/Square')
 
     layer_copy = copy.deepcopy(layer)
     self.assertEqual(layer_copy.name, layer.name)
@@ -206,6 +214,7 @@ class BaseLayerTest(test.TestCase):
     self.assertEqual(layer_copy._graph, layer._graph)
     self.assertEqual(layer_copy._private_tensor, layer._private_tensor)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testScopeNaming(self):
 
     class PrivateLayer(base_layers.Layer):
@@ -253,6 +262,7 @@ class BaseLayerTest(test.TestCase):
       my_layer_scoped1.apply(inputs)
       self.assertEqual(my_layer_scoped1._scope.name, 'var_scope/my_layer_1')
 
+  @test_util.run_in_graph_and_eager_modes()
   def testInputSpecNdimCheck(self):
 
     class CustomerLayer(base_layers.Layer):
@@ -264,18 +274,22 @@ class BaseLayerTest(test.TestCase):
       def call(self, inputs):
         return inputs
 
-    layer = CustomerLayer()
-    with self.assertRaisesRegexp(ValueError,
-                                 r'requires a defined rank'):
-      layer.apply(array_ops.placeholder('int32'))
+    if context.in_graph_mode():
+      layer = CustomerLayer()
+      with self.assertRaisesRegexp(ValueError, r'requires a defined rank'):
+        layer.apply(array_ops.placeholder('int32'))
 
-    with self.assertRaisesRegexp(ValueError,
-                                 r'expected ndim=2'):
-      layer.apply(array_ops.placeholder('int32', shape=(None,)))
+    layer = CustomerLayer()
+    with self.assertRaisesRegexp(ValueError, r'expected ndim=2'):
+      layer.apply(constant_op.constant([1]))
 
+    # Note that we re-create the layer since in Eager mode, input spec checks
+    # only happen on first call.
     # Works
-    layer.apply(array_ops.placeholder('int32', shape=(None, None)))
+    layer = CustomerLayer()
+    layer.apply(constant_op.constant([[1], [2]]))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testInputSpecMinNdimCheck(self):
 
     class CustomerLayer(base_layers.Layer):
@@ -287,19 +301,23 @@ class BaseLayerTest(test.TestCase):
       def call(self, inputs):
         return inputs
 
-    layer = CustomerLayer()
-    with self.assertRaisesRegexp(ValueError,
-                                 r'requires a defined rank'):
-      layer.apply(array_ops.placeholder('int32'))
+    if context.in_graph_mode():
+      layer = CustomerLayer()
+      with self.assertRaisesRegexp(ValueError, r'requires a defined rank'):
+        layer.apply(array_ops.placeholder('int32'))
 
-    with self.assertRaisesRegexp(ValueError,
-                                 r'expected min_ndim=2'):
-      layer.apply(array_ops.placeholder('int32', shape=(None,)))
+    layer = CustomerLayer()
+    with self.assertRaisesRegexp(ValueError, r'expected min_ndim=2'):
+      layer.apply(constant_op.constant([1]))
 
     # Works
-    layer.apply(array_ops.placeholder('int32', shape=(None, None)))
-    layer.apply(array_ops.placeholder('int32', shape=(None, None, None)))
+    layer = CustomerLayer()
+    layer.apply(constant_op.constant([[1], [2]]))
+
+    layer = CustomerLayer()
+    layer.apply(constant_op.constant([[[1], [2]]]))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testInputSpecMaxNdimCheck(self):
 
     class CustomerLayer(base_layers.Layer):
@@ -311,19 +329,23 @@ class BaseLayerTest(test.TestCase):
       def call(self, inputs):
         return inputs
 
-    layer = CustomerLayer()
-    with self.assertRaisesRegexp(ValueError,
-                                 r'requires a defined rank'):
-      layer.apply(array_ops.placeholder('int32'))
+    if context.in_graph_mode():
+      layer = CustomerLayer()
+      with self.assertRaisesRegexp(ValueError, r'requires a defined rank'):
+        layer.apply(array_ops.placeholder('int32'))
 
-    with self.assertRaisesRegexp(ValueError,
-                                 r'expected max_ndim=2'):
-      layer.apply(array_ops.placeholder('int32', shape=(None, None, None)))
+    layer = CustomerLayer()
+    with self.assertRaisesRegexp(ValueError, r'expected max_ndim=2'):
+      layer.apply(constant_op.constant([[[1], [2]]]))
 
     # Works
-    layer.apply(array_ops.placeholder('int32', shape=(None, None)))
-    layer.apply(array_ops.placeholder('int32', shape=(None,)))
+    layer = CustomerLayer()
+    layer.apply(constant_op.constant([1]))
 
+    layer = CustomerLayer()
+    layer.apply(constant_op.constant([[1], [2]]))
+
+  @test_util.run_in_graph_and_eager_modes()
   def testInputSpecDtypeCheck(self):
 
     class CustomerLayer(base_layers.Layer):
@@ -336,13 +358,14 @@ class BaseLayerTest(test.TestCase):
         return inputs
 
     layer = CustomerLayer()
-    with self.assertRaisesRegexp(ValueError,
-                                 r'expected dtype=float32'):
-      layer.apply(array_ops.placeholder('int32'))
+    with self.assertRaisesRegexp(ValueError, r'expected dtype=float32'):
+      layer.apply(constant_op.constant(1, dtype=dtypes.int32))
 
     # Works
-    layer.apply(array_ops.placeholder('float32', shape=(None, None)))
+    layer = CustomerLayer()
+    layer.apply(constant_op.constant(1.0, dtype=dtypes.float32))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testInputSpecAxesCheck(self):
 
     class CustomerLayer(base_layers.Layer):
@@ -355,14 +378,16 @@ class BaseLayerTest(test.TestCase):
         return inputs
 
     layer = CustomerLayer()
-    with self.assertRaisesRegexp(ValueError,
-                                 r'expected axis'):
-      layer.apply(array_ops.placeholder('int32', shape=(None, 3)))
+    with self.assertRaisesRegexp(ValueError, r'expected axis'):
+      layer.apply(constant_op.constant([1, 2, 3]))
 
     # Works
-    layer.apply(array_ops.placeholder('int32', shape=(None, None, 2)))
-    layer.apply(array_ops.placeholder('int32', shape=(None, 2)))
+    layer = CustomerLayer()
+    layer.apply(constant_op.constant([1, 2]))
+    layer = CustomerLayer()
+    layer.apply(constant_op.constant([[1, 2], [3, 4], [5, 6]]))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testInputSpecShapeCheck(self):
 
     class CustomerLayer(base_layers.Layer):
@@ -375,14 +400,14 @@ class BaseLayerTest(test.TestCase):
         return inputs
 
     layer = CustomerLayer()
-    with self.assertRaisesRegexp(ValueError,
-                                 r'expected shape'):
-      layer.apply(array_ops.placeholder('int32', shape=(None, 2)))
+    with self.assertRaisesRegexp(ValueError, r'expected shape'):
+      layer.apply(constant_op.constant([[1, 2]]))
 
     # Works
-    layer.apply(array_ops.placeholder('int32', shape=(None, 3)))
-    layer.apply(array_ops.placeholder('int32', shape=(2, 3)))
+    layer = CustomerLayer()
+    layer.apply(constant_op.constant([[1, 2, 3], [4, 5, 6]]))
 
+  @test_util.run_in_graph_and_eager_modes()
   def testNoInputSpec(self):
 
     class CustomerLayer(base_layers.Layer):
@@ -396,9 +421,12 @@ class BaseLayerTest(test.TestCase):
 
     layer = CustomerLayer()
 
+    layer.apply(constant_op.constant(1))
+
     # Works
-    layer.apply(array_ops.placeholder('int32'))
-    layer.apply(array_ops.placeholder('int32', shape=(2, 3)))
+    if context.in_graph_mode():
+      layer.apply(array_ops.placeholder('int32'))
+      layer.apply(array_ops.placeholder('int32', shape=(2, 3)))
 
   def test_get_updates_for(self):
     a = base_layers.Input(shape=(2,))
@@ -476,10 +504,11 @@ class BaseLayerTest(test.TestCase):
       _ = new_dense.output_shape
 
   def testTopologicalAttributesMultiOutputLayer(self):
+
     class PowersLayer(base_layers.Layer):
 
       def call(self, inputs):
-        return [inputs ** 2, inputs ** 3]
+        return [inputs**2, inputs**3]
 
     x = base_layers.Input(shape=(32,))
     test_layer = PowersLayer()
@@ -491,6 +520,7 @@ class BaseLayerTest(test.TestCase):
     self.assertEqual(test_layer.output_shape, [(None, 32), (None, 32)])
 
   def testTopologicalAttributesMultiInputLayer(self):
+
     class AddLayer(base_layers.Layer):
 
       def call(self, inputs):
@@ -507,6 +537,7 @@ class BaseLayerTest(test.TestCase):
     self.assertEqual(test_layer.input_shape, [(None, 32), (None, 32)])
     self.assertEqual(test_layer.output_shape, (None, 32))
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_count_params(self):
     dense = core_layers.Dense(16)
     dense.build((None, 4))
@@ -652,7 +683,7 @@ class NetworkTest(test.TestCase):
     class PowersLayer(base_layers.Layer):
 
       def call(self, inputs):
-        return [inputs ** 2, inputs ** 3]
+        return [inputs**2, inputs**3]
 
     x = base_layers.Input(shape=(32,))
     p1, p2 = PowersLayer()(x)  # pylint: disable=not-callable
@@ -673,7 +704,7 @@ class NetworkTest(test.TestCase):
 
   def testNetworkAttributes(self):
     x = base_layers.Input(shape=(32,))
-    z = core_layers.Dense(2, kernel_regularizer=lambda x: 0.01 * (x ** 2))(x)
+    z = core_layers.Dense(2, kernel_regularizer=lambda x: 0.01 * (x**2))(x)
     dense = core_layers.Dense(2, name='dense')
     dense.add_update(1)
     y = dense(z)
@@ -795,6 +826,7 @@ class NetworkTest(test.TestCase):
     self.assertEqual(len(network.layers), 2)
     self.assertEqual(network.layers[0].sparse, True)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testMaskingSingleInput(self):
 
     class MaskedLayer(base_layers.Layer):
@@ -807,19 +839,29 @@ class NetworkTest(test.TestCase):
       def compute_mask(self, inputs, mask=None):
         return array_ops.ones_like(inputs)
 
-    x = base_layers.Input(shape=(32,))
-    y = MaskedLayer()(x)  # pylint: disable=not-callable
-    network = base_layers.Network(x, y)
-
-    # test callability on Input
-    x_2 = base_layers.Input(shape=(32,))
-    y_2 = network(x_2)
-    self.assertEqual(y_2.get_shape().as_list(), [None, 32])
-
-    # test callability on regular tensor
-    x_2 = array_ops.placeholder(dtype='float32', shape=(None, 32))
-    y_2 = network(x_2)
-    self.assertEqual(y_2.get_shape().as_list(), [None, 32])
+    if context.in_graph_mode():
+      x = base_layers.Input(shape=(32,))
+      y = MaskedLayer()(x)  # pylint: disable=not-callable
+      network = base_layers.Network(x, y)
+
+      # test callability on Input
+      x_2 = base_layers.Input(shape=(32,))
+      y_2 = network(x_2)
+      self.assertEqual(y_2.get_shape().as_list(), [None, 32])
+
+      # test callability on regular tensor
+      x_2 = array_ops.placeholder(dtype='float32', shape=(None, 32))
+      y_2 = network(x_2)
+      self.assertEqual(y_2.get_shape().as_list(), [None, 32])
+    else:
+      a = constant_op.constant([2] * 32)
+      mask = constant_op.constant([0, 1] * 16)
+      a._keras_mask = mask
+      b = MaskedLayer().apply(a)
+      self.assertTrue(hasattr(b, '_keras_mask'))
+      self.assertAllEqual(self.evaluate(array_ops.ones_like(mask)),
+                          self.evaluate(getattr(b, '_keras_mask')))
+      self.assertAllEqual(self.evaluate(a * mask), self.evaluate(b))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py
index a2906802df..f63f004726 100644
--- a/tensorflow/python/layers/core.py
+++ b/tensorflow/python/layers/core.py
@@ -26,6 +26,7 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
@@ -148,13 +149,14 @@ class Dense(base.Layer):
   def call(self, inputs):
     inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
     shape = inputs.get_shape().as_list()
-    output_shape = shape[:-1] + [self.units]
-    if len(output_shape) > 2:
+    if len(shape) > 2:
       # Broadcasting is required for the inputs.
       outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1],
                                                              [0]])
       # Reshape the output back to the original ndim of the input.
-      outputs.set_shape(output_shape)
+      if context.in_graph_mode():
+        output_shape = shape[:-1] + [self.units]
+        outputs.set_shape(output_shape)
     else:
       outputs = standard_ops.matmul(inputs, self.kernel)
     if self.use_bias:
@@ -287,6 +289,7 @@ class Dropout(base.Layer):
     return self.noise_shape
 
   def call(self, inputs, training=False):
+
     def dropped_inputs():
       return nn.dropout(inputs, 1  - self.rate,
                         noise_shape=self._get_noise_shape(inputs),
diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 6f315f8c4d..188bdeb85c 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -20,9 +20,11 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.layers import core as core_layers
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
@@ -36,6 +38,7 @@ from tensorflow.python.platform import test
 
 class DenseTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def testDenseProperties(self):
     dense = core_layers.Dense(2, activation=nn_ops.relu, name='my_dense')
     self.assertEqual(dense.units, 2)
@@ -53,10 +56,12 @@ class DenseTest(test.TestCase):
     dense.apply(random_ops.random_uniform((5, 2)))
     self.assertEqual(dense.name, 'dense_2')
 
+  @test_util.run_in_graph_and_eager_modes()
   def testCall(self):
     dense = core_layers.Dense(2, activation=nn_ops.relu, name='my_dense')
-    inputs = random_ops.random_uniform((5, 2), seed=1)
-    _ = dense(inputs)
+    inputs = random_ops.random_uniform((5, 4), seed=1)
+    outputs = dense(inputs)
+    self.assertListEqual([5, 2], outputs.get_shape().as_list())
     self.assertListEqual(dense.variables, [dense.kernel, dense.bias])
     self.assertListEqual(dense.trainable_variables, [dense.kernel, dense.bias])
     self.assertListEqual(dense.non_trainable_variables, [])
@@ -65,6 +70,14 @@ class DenseTest(test.TestCase):
     self.assertEqual(dense.kernel.name, 'my_dense/kernel:0')
     self.assertEqual(dense.bias.name, 'my_dense/bias:0')
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testCallTensorDot(self):
+    dense = core_layers.Dense(2, activation=nn_ops.relu, name='my_dense')
+    inputs = random_ops.random_uniform((5, 4, 3), seed=1)
+    outputs = dense(inputs)
+    self.assertListEqual([5, 4, 2], outputs.get_shape().as_list())
+
+  @test_util.run_in_graph_and_eager_modes()
   def testNoBias(self):
     dense = core_layers.Dense(2, use_bias=False, name='my_dense')
     inputs = random_ops.random_uniform((5, 2), seed=1)
@@ -77,6 +90,7 @@ class DenseTest(test.TestCase):
     self.assertEqual(dense.kernel.name, 'my_dense/kernel:0')
     self.assertEqual(dense.bias, None)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testNonTrainable(self):
     dense = core_layers.Dense(2, trainable=False, name='my_dense')
     inputs = random_ops.random_uniform((5, 2), seed=1)
@@ -88,6 +102,7 @@ class DenseTest(test.TestCase):
     self.assertEqual(
         len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 0)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testOutputShape(self):
     dense = core_layers.Dense(7, activation=nn_ops.relu, name='my_dense')
     inputs = random_ops.random_uniform((5, 3), seed=1)
@@ -127,16 +142,19 @@ class DenseTest(test.TestCase):
     dense = core_layers.Dense(4, name='my_dense')
     dense(inputs)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testActivation(self):
     dense = core_layers.Dense(2, activation=nn_ops.relu, name='dense1')
     inputs = random_ops.random_uniform((5, 3), seed=1)
     outputs = dense(inputs)
-    self.assertEqual(outputs.op.name, 'dense1/Relu')
+    if context.in_graph_mode():
+      self.assertEqual(outputs.op.name, 'dense1/Relu')
 
     dense = core_layers.Dense(2, name='dense2')
     inputs = random_ops.random_uniform((5, 3), seed=1)
     outputs = dense(inputs)
-    self.assertEqual(outputs.op.name, 'dense2/BiasAdd')
+    if context.in_graph_mode():
+      self.assertEqual(outputs.op.name, 'dense2/BiasAdd')
 
   def testActivityRegularizer(self):
     regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3
@@ -179,15 +197,18 @@ class DenseTest(test.TestCase):
     self.assertEqual(len(loss_keys), 1)
     self.assertListEqual(dense.losses, loss_keys)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testFunctionalDense(self):
     inputs = random_ops.random_uniform((5, 3), seed=1)
     outputs = core_layers.dense(
         inputs, 2, activation=nn_ops.relu, name='my_dense')
     self.assertEqual(
         len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2)
-    self.assertEqual(outputs.op.name, 'my_dense/Relu')
+    if context.in_graph_mode():
+      self.assertEqual(outputs.op.name, 'my_dense/Relu')
     self.assertEqual(outputs.get_shape().as_list(), [5, 2])
 
+  @test_util.run_in_graph_and_eager_modes()
   def testFunctionalDenseTwice(self):
     inputs = random_ops.random_uniform((5, 3), seed=1)
     core_layers.dense(inputs, 2)
@@ -197,6 +218,7 @@ class DenseTest(test.TestCase):
     self.assertEqual(len(vars1), 2)
     self.assertEqual(len(vars2), 4)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testFunctionalDenseTwiceReuse(self):
     inputs = random_ops.random_uniform((5, 3), seed=1)
     core_layers.dense(inputs, 2, name='my_dense')
@@ -205,6 +227,7 @@ class DenseTest(test.TestCase):
     vars2 = variables.trainable_variables()
     self.assertEqual(vars1, vars2)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testFunctionalDenseTwiceReuseFromScope(self):
     with variable_scope.variable_scope('scope'):
       inputs = random_ops.random_uniform((5, 3), seed=1)
@@ -215,20 +238,23 @@ class DenseTest(test.TestCase):
       vars2 = variables.trainable_variables()
     self.assertEqual(vars1, vars2)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testFunctionalDenseInitializerFromScope(self):
-    with self.test_session() as sess:
-      with variable_scope.variable_scope(
-          'scope', initializer=init_ops.ones_initializer()):
-        inputs = random_ops.random_uniform((5, 3), seed=1)
-        core_layers.dense(inputs, 2)
-        sess.run(variables.global_variables_initializer())
-        weights = sess.run(variables.trainable_variables())
-        self.assertEqual(len(weights), 2)
-        # Check that the matrix weights got initialized to ones (from scope).
-        self.assertAllClose(weights[0], np.ones((3, 2)))
-        # Check that the bias still got initialized to zeros.
-        self.assertAllClose(weights[1], np.zeros((2)))
-
+    with variable_scope.variable_scope(
+        'scope', initializer=init_ops.ones_initializer()):
+      inputs = random_ops.random_uniform((5, 3), seed=1)
+      core_layers.dense(inputs, 2)
+      if context.in_graph_mode():
+        self.evaluate(variables.global_variables_initializer())
+      weights = variables.trainable_variables()
+      self.assertEqual(len(weights), 2)
+      # Check that the matrix weights got initialized to ones (from scope).
+      self.assertAllClose(
+          self.evaluate(weights[0].read_value()), np.ones((3, 2)))
+      # Check that the bias still got initialized to zeros.
+      self.assertAllClose(self.evaluate(weights[1].read_value()), np.zeros((2)))
+
+  @test_util.run_in_graph_and_eager_modes()
   def testFunctionalDenseWithCustomGetter(self):
     called = [0]
 
@@ -241,6 +267,7 @@ class DenseTest(test.TestCase):
       core_layers.dense(inputs, 2)
     self.assertEqual(called[0], 2)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testFunctionalDenseInScope(self):
     with variable_scope.variable_scope('test'):
       inputs = random_ops.random_uniform((5, 3), seed=1)
@@ -258,6 +285,7 @@ class DenseTest(test.TestCase):
       var = variables.trainable_variables()[4]
       self.assertEqual(var.name, 'test2/dense/kernel:0')
 
+  @test_util.run_in_graph_and_eager_modes()
   def testComputeOutputShape(self):
     dense = core_layers.Dense(2, activation=nn_ops.relu, name='dense1')
     ts = tensor_shape.TensorShape
@@ -279,6 +307,7 @@ class DenseTest(test.TestCase):
         dense._compute_output_shape(ts([None, 4, 3])).as_list())
     # pylint: enable=protected-access
 
+  @test_util.run_in_graph_and_eager_modes()
   def testConstraints(self):
     k_constraint = lambda x: x / math_ops.reduce_sum(x)
     b_constraint = lambda x: x / math_ops.reduce_max(x)
@@ -293,6 +322,7 @@ class DenseTest(test.TestCase):
 
 class DropoutTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes()
   def testDropoutProperties(self):
     dp = core_layers.Dropout(0.5, name='dropout')
     self.assertEqual(dp.rate, 0.5)
@@ -300,17 +330,18 @@ class DropoutTest(test.TestCase):
     dp.apply(array_ops.ones(()))
     self.assertEqual(dp.name, 'dropout')
 
+  @test_util.run_in_graph_and_eager_modes()
   def testBooleanLearningPhase(self):
-    with self.test_session() as sess:
-      dp = core_layers.Dropout(0.5)
-      inputs = array_ops.ones((5, 3))
-      dropped = dp.apply(inputs, training=True)
-      sess.run(variables.global_variables_initializer())
-      np_output = sess.run(dropped)
-      self.assertAlmostEqual(0., np_output.min())
-      dropped = dp.apply(inputs, training=False)
-      np_output = sess.run(dropped)
-      self.assertAllClose(np.ones((5, 3)), np_output)
+    dp = core_layers.Dropout(0.5)
+    inputs = array_ops.ones((5, 3))
+    dropped = dp.apply(inputs, training=True)
+    if context.in_graph_mode():
+      self.evaluate(variables.global_variables_initializer())
+    np_output = self.evaluate(dropped)
+    self.assertAlmostEqual(0., np_output.min())
+    dropped = dp.apply(inputs, training=False)
+    np_output = self.evaluate(dropped)
+    self.assertAllClose(np.ones((5, 3)), np_output)
 
   def testDynamicLearningPhase(self):
     with self.test_session() as sess:
@@ -318,35 +349,34 @@ class DropoutTest(test.TestCase):
       inputs = array_ops.ones((5, 5))
       training = array_ops.placeholder(dtype='bool')
       dropped = dp.apply(inputs, training=training)
-      sess.run(variables.global_variables_initializer())
+      self.evaluate(variables.global_variables_initializer())
       np_output = sess.run(dropped, feed_dict={training: True})
       self.assertAlmostEqual(0., np_output.min())
       np_output = sess.run(dropped, feed_dict={training: False})
       self.assertAllClose(np.ones((5, 5)), np_output)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testCustomNoiseShape(self):
-    with self.test_session() as sess:
-      inputs = array_ops.ones((5, 3, 2))
-      noise_shape = [5, 1, 2]
-      dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1)
-      dropped = dp.apply(inputs, training=True)
-      sess.run(variables.global_variables_initializer())
-      np_output = sess.run(dropped)
-      self.assertAlmostEqual(0., np_output.min())
-      self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :])
-
+    inputs = array_ops.ones((5, 3, 2))
+    noise_shape = [5, 1, 2]
+    dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1)
+    dropped = dp.apply(inputs, training=True)
+    self.evaluate(variables.global_variables_initializer())
+    np_output = self.evaluate(dropped)
+    self.assertAlmostEqual(0., np_output.min())
+    self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :])
+
+  @test_util.run_in_graph_and_eager_modes()
   def testFunctionalDropout(self):
-    with self.test_session() as sess:
-      inputs = array_ops.ones((5, 5))
-      training = array_ops.placeholder(dtype='bool')
-      dropped = core_layers.dropout(inputs, 0.5, training=training, seed=1)
-      self.assertEqual(dropped.op.name, 'dropout/cond/Merge')
-
-      sess.run(variables.global_variables_initializer())
-      np_output = sess.run(dropped, feed_dict={training: True})
-      self.assertAlmostEqual(0., np_output.min())
-      np_output = sess.run(dropped, feed_dict={training: False})
-      self.assertAllClose(np.ones((5, 5)), np_output)
+    inputs = array_ops.ones((5, 5))
+    dropped = core_layers.dropout(inputs, 0.5, training=True, seed=1)
+    if context.in_graph_mode():
+      self.evaluate(variables.global_variables_initializer())
+    np_output = self.evaluate(dropped)
+    self.assertAlmostEqual(0., np_output.min())
+    dropped = core_layers.dropout(inputs, 0.5, training=False, seed=1)
+    np_output = self.evaluate(dropped)
+    self.assertAllClose(np.ones((5, 5)), np_output)
 
   def testDynamicRate(self):
     with self.test_session() as sess:
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 467567f30e..d4b1663507 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1368,21 +1368,21 @@ def _flatten_outer_dims(logits):
   output = array_ops.reshape(logits, array_ops.concat([[-1], last_dim_size], 0))
 
   # Set output shape if known.
-  shape = logits.get_shape()
-  if shape is not None and shape.dims is not None:
-    shape = shape.as_list()
-    product = 1
-    product_valid = True
-    for d in shape[:-1]:
-      if d is None:
-        product_valid = False
-        break
-      else:
-        product *= d
-    # Only need to set shape if in graph mode
-    if product_valid and context.in_graph_mode():
-      output_shape = [product, shape[-1]]
-      output.set_shape(output_shape)
+  if context.in_graph_mode():
+    shape = logits.get_shape()
+    if shape is not None and shape.dims is not None:
+      shape = shape.as_list()
+      product = 1
+      product_valid = True
+      for d in shape[:-1]:
+        if d is None:
+          product_valid = False
+          break
+        else:
+          product *= d
+      if product_valid:
+        output_shape = [product, shape[-1]]
+        output.set_shape(output_shape)
 
   return output
 
@@ -1605,7 +1605,7 @@ def softmax_cross_entropy_with_logits(_sentinel=None,  # pylint: disable=invalid
 
   # Make shape inference work since reshape and transpose may erase its static
   # shape.
-  if shape is not None and shape.dims is not None and context.in_graph_mode():
+  if context.in_graph_mode() and shape is not None and shape.dims is not None:
     shape = shape.as_list()
     del shape[dim]
     cost.set_shape(shape)
@@ -1922,7 +1922,8 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None):  # pylint: di
     # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
     binary_tensor = math_ops.floor(random_tensor)
     ret = math_ops.div(x, keep_prob) * binary_tensor
-    ret.set_shape(x.get_shape())
+    if context.in_graph_mode():
+      ret.set_shape(x.get_shape())
     return ret
 
 
-- 
GitLab


From 9a86c0d059bd9893d7bc5885280caa325127ff4e Mon Sep 17 00:00:00 2001
From: Peter Hawkins 
Date: Wed, 30 Aug 2017 11:15:00 -0700
Subject: [PATCH 101/294] [TF:XLA] Add implementations of Tan, Sinh, Cosh,
 Asinh, Acosh, Atanh, Expm1.

PiperOrigin-RevId: 167020800
---
 tensorflow/compiler/tests/randomized_tests.cc | 80 +++++++++++++++++--
 tensorflow/compiler/tests/unary_ops_test.py   | 41 ++++++++++
 .../compiler/tf2xla/kernels/unary_ops.cc      | 29 +++++++
 3 files changed, 145 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc
index e380fdd7f4..a342e37e0e 100644
--- a/tensorflow/compiler/tests/randomized_tests.cc
+++ b/tensorflow/compiler/tests/randomized_tests.cc
@@ -829,6 +829,13 @@ TEST_F(OpTest, Abs) {
   });
 }
 
+TEST_F(OpTest, Acosh) {
+  Repeatedly([this]() {
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("Acosh").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
+  });
+}
+
 TEST_F(OpTest, Add) {
   Repeatedly([this]() {
     DataType type = Choose({DT_INT32, DT_FLOAT});
@@ -881,6 +888,20 @@ TEST_F(OpTest, Any) {
   });
 }
 
+TEST_F(OpTest, Asinh) {
+  Repeatedly([this]() {
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("Asinh").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
+  });
+}
+
+TEST_F(OpTest, Atanh) {
+  Repeatedly([this]() {
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("Atanh").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
+  });
+}
+
 TEST_F(OpTest, AvgPool) {
   Repeatedly([this]() {
     std::uniform_int_distribution random_int(1, 5);
@@ -1372,6 +1393,20 @@ TEST_F(OpTest, DepthwiseConv2DBackpropFilter) {
   });
 }
 
+TEST_F(OpTest, Cos) {
+  Repeatedly([this]() {
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("Cos").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
+  });
+}
+
+TEST_F(OpTest, Cosh) {
+  Repeatedly([this]() {
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("Cosh").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
+  });
+}
+
 TEST_F(OpTest, DepthwiseConv2DBackpropInput) {
   Repeatedly([this]() {
     WindowedSpatialDims d = ChooseWindowedSpatialDims(2);
@@ -1459,11 +1494,11 @@ TEST_F(OpTest, DynamicStitch) {
     } while (size == 0);
 
     // Shuffle the range of indices that cover the output.
-    // TODO(phawkins): The documentation for DynamicStitch doesn't require that
-    // the indices cover all positions of the output. The XLA implementation
-    // does so require. However, the native TF implementation leaves undefined
-    // values if we don't cover everything, so we can't really test that case
-    // anyway.
+    // TODO(phawkins): The documentation for DynamicStitch doesn't require
+    // that the indices cover all positions of the output. The XLA
+    // implementation does so require. However, the native TF implementation
+    // leaves undefined values if we don't cover everything, so we can't
+    // really test that case anyway.
     std::vector indices(size);
     std::iota(indices.begin(), indices.end(), 0);
     std::shuffle(indices.begin(), indices.end(), generator());
@@ -1540,6 +1575,13 @@ TEST_F(OpTest, Exp) {
   });
 }
 
+TEST_F(OpTest, Expm1) {
+  Repeatedly([this]() {
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("Expm1").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
+  });
+}
+
 TEST_F(OpTest, ExpandDims) {
   Repeatedly([this]() {
     DataType type = Choose(kAllXlaTypes);
@@ -1675,6 +1717,13 @@ TEST_F(OpTest, Log) {
   });
 }
 
+TEST_F(OpTest, Log1p) {
+  Repeatedly([this]() {
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("Log1p").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
+  });
+}
+
 TEST_F(OpTest, LogicalAnd) {
   Repeatedly([this]() {
     auto dims = BroadcastableDims();
@@ -2272,6 +2321,20 @@ TEST_F(OpTest, Sign) {
   });
 }
 
+TEST_F(OpTest, Sin) {
+  Repeatedly([this]() {
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("Sin").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
+  });
+}
+
+TEST_F(OpTest, Sinh) {
+  Repeatedly([this]() {
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("Sinh").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
+  });
+}
+
 TEST_F(OpTest, Size) {
   Repeatedly([this]() {
     DataType type = Choose({DT_INT32, DT_FLOAT});
@@ -2665,6 +2728,13 @@ TEST_F(OpTest, StridedSliceGrad) {
   });
 }
 
+TEST_F(OpTest, Tan) {
+  Repeatedly([this]() {
+    return ExpectTfAndXlaOutputsAreClose(
+        OpTestBuilder("Tan").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT));
+  });
+}
+
 TEST_F(OpTest, Tanh) {
   Repeatedly([this]() {
     return ExpectTfAndXlaOutputsAreClose(
diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py
index cfc2a0c85e..ca2a438005 100644
--- a/tensorflow/compiler/tests/unary_ops_test.py
+++ b/tensorflow/compiler/tests/unary_ops_test.py
@@ -117,16 +117,45 @@ class UnaryOpsTest(XLATestCase):
 
   def testFloatOps(self):
     for dtype in self.float_types:
+      self._assertOpOutputMatchesExpected(
+          math_ops.acosh,
+          np.array([1, 2, 3, 4], dtype=dtype),
+          expected=np.array([0, 1.3169579, 1.76274717, 2.06343707],
+                            dtype=dtype))
+
+      self._assertOpOutputMatchesExpected(
+          math_ops.asinh,
+          np.array([1, 2, 3, 4], dtype=dtype),
+          expected=np.array([0.88137359, 1.44363548, 1.81844646, 2.09471255],
+                            dtype=dtype))
+
+      self._assertOpOutputMatchesExpected(
+          math_ops.atanh,
+          np.array([0.1, 0.2, 0.3, 0.4], dtype=dtype),
+          expected=np.array([0.10033535, 0.20273255, 0.3095196, 0.42364893],
+                            dtype=dtype))
+
       self._assertOpOutputMatchesExpected(
           math_ops.ceil,
           np.array([[-1.7, 1.2]], dtype=dtype),
           expected=np.array([[-1, 2]], dtype=dtype))
 
+      self._assertOpOutputMatchesExpected(
+          math_ops.cosh,
+          np.array([1, 2, 3, 4], dtype=dtype),
+          expected=np.array([1.54308063, 3.76219569, 10.067662, 27.30823284],
+                            dtype=dtype))
+
       self._assertOpOutputMatchesExpected(
           math_ops.exp,
           np.array([[-1, 1]], dtype=dtype),
           expected=np.array([[0.36787945, 2.7182817]], dtype=dtype))
 
+      self._assertOpOutputMatchesExpected(
+          math_ops.expm1,
+          np.array([[-1, 1]], dtype=dtype),
+          expected=np.array([[-0.63212056, 1.71828183]], dtype=dtype))
+
       self._assertOpOutputMatchesExpected(
           math_ops.floor,
           np.array([[-1.7, 1.2]], dtype=dtype),
@@ -197,11 +226,23 @@ class UnaryOpsTest(XLATestCase):
           np.array([-300, -150, 0, 150, 300], dtype=dtype),
           expected=np.array([0, 0, 0.5, 1, 1], dtype=dtype))
 
+      self._assertOpOutputMatchesExpected(
+          math_ops.sinh,
+          np.array([1, 2, 3, 4], dtype=dtype),
+          expected=np.array([1.17520119, 3.62686041, 10.01787493, 27.2899172],
+                            dtype=dtype))
+
       self._assertOpOutputMatchesExpected(
           math_ops.sqrt,
           np.array([[4, 9]], dtype=dtype),
           expected=np.array([[2, 3]], dtype=dtype))
 
+      self._assertOpOutputMatchesExpected(
+          math_ops.tan,
+          np.array([1, 2, 3, 4], dtype=dtype),
+          expected=np.array([1.55740772, -2.18503986, -0.14254654, 1.15782128],
+                            dtype=dtype))
+
       self._assertOpOutputMatchesExpected(
           math_ops.tanh,
           np.array(
diff --git a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc
index 626ddd17d3..7b39f0533b 100644
--- a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc
@@ -43,10 +43,35 @@ namespace {
 
 // Return x if x>0, otherwise -x.
 XLAJIT_MAKE_UNARY(Abs, b->Abs(x));
+
+// acosh(x) = log(x + sqrt(x^2 - 1))
+XLAJIT_MAKE_UNARY(
+    Acosh,
+    b->Log(b->Add(x, b->Pow(b->Sub(b->Mul(x, x),
+                                   XlaHelpers::One(b, input_type(0))),
+                            XlaHelpers::FloatLiteral(b, input_type(0), 0.5)))));
+// asinh(x) = log(x + sqrt(x^2 + 1))
+XLAJIT_MAKE_UNARY(
+    Asinh,
+    b->Log(b->Add(x, b->Pow(b->Add(b->Mul(x, x),
+                                   XlaHelpers::One(b, input_type(0))),
+                            XlaHelpers::FloatLiteral(b, input_type(0), 0.5)))));
+// atanh(x) = 0.5 * log((1 + x) / (1 - x))
+XLAJIT_MAKE_UNARY(
+    Atanh, b->Mul(b->Log(b->Div(b->Add(XlaHelpers::One(b, input_type(0)), x),
+                                b->Sub(XlaHelpers::One(b, input_type(0)), x))),
+                  XlaHelpers::FloatLiteral(b, input_type(0), 0.5)));
 XLAJIT_MAKE_UNARY(Ceil, b->Ceil(x));
 XLAJIT_MAKE_UNARY(Cos, b->Cos(x));
+XLAJIT_MAKE_UNARY(Cosh,
+                  b->Mul(b->Add(b->Exp(x), b->Exp(b->Neg(x))),
+                         XlaHelpers::FloatLiteral(b, input_type(0), 0.5)));
 XLAJIT_MAKE_UNARY(Sin, b->Sin(x));
 XLAJIT_MAKE_UNARY(Exp, b->Exp(x));
+
+// TODO(b/34703906): use a more accurate implementation of expm1.
+XLAJIT_MAKE_UNARY(Expm1, b->Sub(b->Exp(x), XlaHelpers::One(b, input_type(0))));
+
 XLAJIT_MAKE_UNARY(Floor, b->Floor(x));
 // Returns 0 if x is 0, -1 if x < 0 and 1 if x > 0.
 XLAJIT_MAKE_UNARY(Sign, b->Sign(x));
@@ -92,11 +117,15 @@ XLAJIT_MAKE_UNARY(Round, Round(b, input_type(0), x));
 XLAJIT_MAKE_UNARY(Rsqrt,
                   b->Pow(x, XlaHelpers::FloatLiteral(b, input_type(0), -0.5)));
 XLAJIT_MAKE_UNARY(Sigmoid, Sigmoid(b, input_type(0), x));
+XLAJIT_MAKE_UNARY(Sinh,
+                  b->Mul(b->Sub(b->Exp(x), b->Exp(b->Neg(x))),
+                         XlaHelpers::FloatLiteral(b, input_type(0), 0.5)));
 XLAJIT_MAKE_UNARY(Softplus,
                   b->Log(b->Add(b->Exp(x), XlaHelpers::One(b, input_type(0)))));
 XLAJIT_MAKE_UNARY(Sqrt,
                   b->Pow(x, XlaHelpers::FloatLiteral(b, input_type(0), 0.5)));
 XLAJIT_MAKE_UNARY(Square, b->Mul(x, x));
+XLAJIT_MAKE_UNARY(Tan, b->Div(b->Sin(x), b->Cos(x)));
 XLAJIT_MAKE_UNARY(Tanh, b->Tanh(x));
 
 #undef XLAJIT_MAKE_UNARY
-- 
GitLab


From 3157ec2343f9b3695cf7949ddf7528e6ba2252f2 Mon Sep 17 00:00:00 2001
From: Mark Heffernan 
Date: Wed, 30 Aug 2017 11:37:09 -0700
Subject: [PATCH 102/294] Add check in HloVerifier that the size of a bitcast's
 output matches the size of its operand.

PiperOrigin-RevId: 167024560
---
 tensorflow/compiler/xla/service/BUILD         |  1 -
 .../xla/service/algebraic_simplifier.cc       |  5 ----
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  2 +-
 .../compiler/xla/service/gpu/gpu_compiler.cc  | 25 +++++++++++--------
 .../compiler/xla/service/hlo_verifier.cc      | 12 ++++++++-
 .../compiler/xla/service/hlo_verifier.h       |  6 +++++
 6 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index fc639ec573..9d4e7fc254 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1009,7 +1009,6 @@ cc_library(
         ":hlo",
         ":hlo_pass",
         ":hlo_query",
-        ":hlo_verifier",
         ":shape_inference",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 2dcba6996c..74f8e3143d 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -29,7 +29,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/hlo_query.h"
-#include "tensorflow/compiler/xla/service/hlo_verifier.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
@@ -1648,10 +1647,6 @@ StatusOr AlgebraicSimplifier::Run(HloModule* module) {
   }
   XLA_VLOG_LINES(2,
                  "AlgebraicSimplifier::Run(), after:\n" + module->ToString());
-
-  HloVerifier verifier;
-  TF_DCHECK_OK(verifier.Run(module).status());
-
   return changed;
 }
 
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 9c90c84d65..839fe48488 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -253,7 +253,7 @@ class CollectProfileCandidates : public DfsHloVisitorWithDefault {
 Status CpuCompiler::RunHloPasses(HloModule* module) {
   // Optimization pipeline.
   HloPassPipeline pipeline("CPU");
-  pipeline.AddInvariantChecker();
+  pipeline.AddInvariantChecker(ShapeSizeBytesFunction());
 
   ReducePrecisionInsertion::AddPasses(
       &pipeline, module->config().debug_options(),
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 14054b3553..7f5be602be 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -120,18 +120,19 @@ string GetLibdeviceDir(const HloModuleConfig& config) {
 }
 
 // Runs optimization passes on the given HLO module.
-tensorflow::Status OptimizeHloModule(HloModule* hlo_module,
-                                     const se::DeviceDescription& device_desc) {
+tensorflow::Status OptimizeHloModule(
+    HloModule* hlo_module, const se::DeviceDescription& device_desc,
+    const HloCostAnalysis::ShapeSizeFunction& shape_size_function) {
   {
     HloPassPipeline pipeline("optimization");
-    pipeline.AddInvariantChecker();
+    pipeline.AddInvariantChecker(shape_size_function);
     ReducePrecisionInsertion::AddPasses(
         &pipeline, hlo_module->config().debug_options(),
         ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION);
     {
       auto& pass =
           pipeline.AddPass>("simplification");
-      pass.AddInvariantChecker();
+      pass.AddInvariantChecker(shape_size_function);
 
       // TODO(b/62764704): Do not rewrite on GPU, use cuDNN's BatchNorm APIs
       // instead.
@@ -160,14 +161,14 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module,
   }
   {
     HloPassFix fusion("fusion");
-    fusion.AddInvariantChecker();
+    fusion.AddInvariantChecker(shape_size_function);
     fusion.AddPass(/*may_duplicate=*/false);
     fusion.AddPass(/*may_duplicate=*/true);
     fusion.AddPass();
     TF_RETURN_IF_ERROR(fusion.Run(hlo_module).status());
 
     HloPassPipeline reduce_pipeline("reduce-precision");
-    reduce_pipeline.AddInvariantChecker();
+    reduce_pipeline.AddInvariantChecker(shape_size_function);
     ReducePrecisionInsertion::AddPasses(
         &reduce_pipeline, hlo_module->config().debug_options(),
         ReducePrecisionInsertion::PassTiming::AFTER_FUSION);
@@ -185,14 +186,16 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module,
 
 // Modifies the given HLO module so that it will be accepted by IrEmitter.
 // Unlike optimization passes, the passes are necessary for correctness.
-tensorflow::Status PrepareHloModuleForIrEmitting(HloModule* hlo_module) {
+tensorflow::Status PrepareHloModuleForIrEmitting(
+    HloModule* hlo_module,
+    const HloCostAnalysis::ShapeSizeFunction& shape_size_function) {
   // In some cases, we have to place the result of an instruction in a temporary
   // buffer. For instance, the buffer that holds an external parameter is
   // assumed immutable at this point, and should not be reused for output
   // (b/27180329). Therefore, in that case, we set the output to be a copy of
   // the parameter.
   HloPassPipeline pipeline("GPU-ir-emit-prepare");
-  pipeline.AddInvariantChecker();
+  pipeline.AddInvariantChecker(shape_size_function);
   pipeline.AddPass();
   pipeline.AddPass(
       hlo_module->mutable_entry_computation_layout());
@@ -261,9 +264,11 @@ StatusOr> GpuCompiler::Compile(
     std::unique_ptr module, se::StreamExecutor* stream_exec) {
   TF_RET_CHECK(stream_exec != nullptr);
 
+  TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(),
+                                       stream_exec->GetDeviceDescription(),
+                                       ShapeSizeBytesFunction()));
   TF_RETURN_IF_ERROR(
-      OptimizeHloModule(module.get(), stream_exec->GetDeviceDescription()));
-  TF_RETURN_IF_ERROR(PrepareHloModuleForIrEmitting(module.get()));
+      PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction()));
 
   llvm::LLVMContext llvm_context;
   std::string buffer;
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 29ef237bc0..9ba2d54d02 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -28,6 +28,10 @@ namespace {
 // TODO(b/26024837): Check output shape for all instruction types.
 class ShapeVerifier : public DfsHloVisitor {
  public:
+  explicit ShapeVerifier(
+      const std::function& shape_size_fn)
+      : shape_size_fn_(shape_size_fn) {}
+
   Status HandleElementwiseUnary(HloInstruction* hlo,
                                 HloOpcode opcode) override {
     return CheckUnaryShape(hlo);
@@ -121,6 +125,9 @@ class ShapeVerifier : public DfsHloVisitor {
   }
 
   Status HandleBitcast(HloInstruction* bitcast) override {
+    // Bitcasts can be any shape, as long as the size matches the operand size.
+    TF_RET_CHECK(shape_size_fn_(bitcast->shape()) ==
+                 shape_size_fn_(bitcast->operand(0)->shape()));
     return tensorflow::Status::OK();
   }
 
@@ -270,13 +277,16 @@ class ShapeVerifier : public DfsHloVisitor {
                             instruction->opcode(), instruction->operands()));
     return CheckShape(instruction, expected);
   }
+
+  // Returns the size of a Shape in bytes.
+  const std::function shape_size_fn_;
 };
 
 }  // namespace
 
 StatusOr HloVerifier::Run(HloModule* module) {
   tensorflow::gtl::FlatMap instructions;
-  ShapeVerifier shape_verifier;
+  ShapeVerifier shape_verifier(shape_size_fn_);
 
   for (auto& computation : module->computations()) {
     for (const auto& instruction : computation->instructions()) {
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index 5159420b3f..bc6800dae5 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -24,12 +24,18 @@ namespace xla {
 // the module.
 class HloVerifier : public HloPassInterface {
  public:
+  explicit HloVerifier(const std::function& shape_size_fn)
+      : shape_size_fn_(shape_size_fn) {}
   ~HloVerifier() override = default;
   tensorflow::StringPiece name() const override { return "verifier"; }
 
   // Note: always returns false (no instructions are ever modified by this
   // pass).
   StatusOr Run(HloModule* module) override;
+
+ private:
+  // Returns the size of a Shape in bytes.
+  const std::function shape_size_fn_;
 };
 
 }  // namespace xla
-- 
GitLab


From e410fe82aafd5125ee0db4edc0a801ef0fea4a98 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 11:45:07 -0700
Subject: [PATCH 103/294] [XLA:CPU] One character code review (Update
 'all_bitcasts' variable correctly).

PiperOrigin-RevId: 167025837
---
 .../compiler/xla/service/cpu/cpu_parallelization_preparation.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_parallelization_preparation.cc b/tensorflow/compiler/xla/service/cpu/cpu_parallelization_preparation.cc
index 4d0e0f744a..20ee4f12e5 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_parallelization_preparation.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_parallelization_preparation.cc
@@ -70,7 +70,7 @@ StatusOr ParallelizationPreparation::Run(HloModule* module) {
     while (CanOutlineWithUser(outline_candidate)) {
       HloInstruction* prior_candidate = outline_candidate;
       outline_candidate = *outline_candidate->users().begin();
-      all_bitcasts |= outline_candidate->opcode() == HloOpcode::kBitcast;
+      all_bitcasts &= outline_candidate->opcode() == HloOpcode::kBitcast;
       if (std::any_of(outline_candidate->operands().begin(),
                       outline_candidate->operands().end(),
                       [&](const HloInstruction* operand) {
-- 
GitLab


From b3fbb9c81b4065d66829457b8f15ea7c02f30049 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 11:45:21 -0700
Subject: [PATCH 104/294] [XLA] Make some static functions in InstructionFusion
 members.

PiperOrigin-RevId: 167025880
---
 .../compiler/xla/service/instruction_fusion.cc      | 13 ++-----------
 .../compiler/xla/service/instruction_fusion.h       | 13 +++++++++++++
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index d449c637b5..265be54116 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -111,19 +111,11 @@ namespace xla {
   return false;
 }
 
-namespace {
-// Returns true if fusing producer into consumer would cause producer to be
-// duplicated. This is the case if producer has uses other than consumer.
-bool FusionWouldDuplicate(const HloInstruction& producer,
-                          const HloInstruction& consumer) {
-  return !(producer.users().size() == 1 && consumer.IsUserOf(&producer));
-}
-
 // An "effectively unary" operation is one that has one "large"
 // input with the others being negligible in terms of memory usage.
 // We use "has a smaller true rank than the output" as a heuristic
 // for "negligible" memory usage.
-bool EffectivelyUnary(HloInstruction* hlo) {
+bool InstructionFusion::EffectivelyUnary(HloInstruction* hlo) {
   int64 output_rank = 0;
   ShapeUtil::ForEachSubshape(
       hlo->shape(),
@@ -145,7 +137,6 @@ bool EffectivelyUnary(HloInstruction* hlo) {
                                 output_rank;
                        }) <= 1;
 }
-}  // namespace
 
 bool InstructionFusion::CanFuseOnAllPaths(
     const HloReachabilityMap& reachability_map, HloInstruction* producer,
@@ -243,7 +234,7 @@ StatusOr InstructionFusion::Run(HloModule* module) {
     DoNotFuseSet do_not_fuse;
     auto reachability = computation->ComputeReachability();
 
-    auto cheap_to_duplicate = [](HloInstruction* producer) {
+    auto cheap_to_duplicate = [this](HloInstruction* producer) {
       if (producer->opcode() == HloOpcode::kBroadcast) {
         return true;
       }
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h
index 3ac13ffda0..0eb8d03489 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/instruction_fusion.h
@@ -70,6 +70,19 @@ class InstructionFusion : public HloPassInterface {
   virtual HloInstruction* Fuse(HloInstruction* producer,
                                HloInstruction* consumer);
 
+  // An "effectively unary" operation is one that has one "large"
+  // input with the others being negligible in terms of memory usage.
+  // We use "has a smaller true rank than the output" as a heuristic
+  // for "negligible" memory usage.
+  bool EffectivelyUnary(HloInstruction* hlo);
+
+  // Returns true if fusing producer into consumer would cause producer to be
+  // duplicated. This is the case if producer has uses other than consumer.
+  bool FusionWouldDuplicate(const HloInstruction& producer,
+                            const HloInstruction& consumer) {
+    return !(producer.users().size() == 1 && consumer.IsUserOf(&producer));
+  }
+
   // Current HloComputation instance the loop fuser is traversing.
   HloComputation* computation_;
   HloModule* module_;
-- 
GitLab


From 3327f2078e7a062521cc5146d39f6a6d9be42769 Mon Sep 17 00:00:00 2001
From: Frank Chen 
Date: Wed, 30 Aug 2017 11:46:31 -0700
Subject: [PATCH 105/294] Add GCS file system support to TPU profiler so that
 we can write directly the collected profiles directly to GCS (since we are
 recommending that model checkpoints be written to GCS when using Cloud TPUs).

PiperOrigin-RevId: 167026061
---
 tensorflow/contrib/tpu/profiler/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD
index 0c860ad4d7..9157c79a90 100644
--- a/tensorflow/contrib/tpu/profiler/BUILD
+++ b/tensorflow/contrib/tpu/profiler/BUILD
@@ -28,6 +28,7 @@ cc_binary(
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/distributed_runtime/rpc:grpc_util",
+        "//tensorflow/core/platform/cloud:gcs_file_system",
         "@grpc//:grpc++_unsecure",
     ],
 )
-- 
GitLab


From e952f2566662fff98b2c34e22b8c8398f9b7d450 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 11:48:18 -0700
Subject: [PATCH 106/294] * Add GPU support for matrix_solve_ls by implement
 the default fast path a composite Python function that only relies on matmul,
 cholesky and triangular_solve, which all have GPU implementations. * Enable
 unit test for most linear algebra ops and their gradients on GPU. *
 Temporarily disable GPU test for matrix_solve gradient until a latent bug
 revealed by this change is fixed. * Add a benchmark for matrix_solve_ls.

The impact on CPU speed is generally positive except for a few regressions on tiny matrices. It is significantly faster (e.g. 50X faster for 1001x1001) for single large matrices, since most of the work is now performed in multi-threaded instead of single-threaded matmul.

Shape         Nrhs   Before              After               Speedup
(4,4)           1    6.60419464111e-05    6.103515625e-05     7.58%
(4,4)           2    6.29425048828e-05	  6.5803527832e-05   -4.55%
(4,4)           4    6.38961791992e-05	  7.29560852051e-05 -14.18%
(8,4)           1    6.19888305664e-05	  6.19888305664e-05   0.00%
(8,4)           2    6.103515625e-05	  6.38961791992e-05  -4.69%
(8,4)           4    5.98430633545e-05	  6.29425048828e-05  -5.18%
(4,8)           1    6.00814819336e-05	  6.00814819336e-05   0.00%
(4,8)           2    6.29425048828e-05	  6.19888305664e-05   1.52%
(4,8)           8    6.07967376709e-05	  6.29425048828e-05  -3.53%
(10,10)         1    6.22272491455e-05	  6.50882720947e-05  -4.60%
(10,10)         2    6.31809234619e-05	  6.48498535156e-05  -2.64%
(10,10)         10   6.19888305664e-05	  7.29560852051e-05 -17.69%
(10,8)          1    6.19888305664e-05	  8.29696655273e-05 -33.85%
(10,8)          2    7.29560852051e-05	  6.50882720947e-05  10.78%
(10,8)          8    6.19888305664e-05	  7.08103179932e-05 -14.23%
(8,10)          1    6.103515625e-05	  6.103515625e-05     0.00%
(8,10)          2    6.29425048828e-05	  6.103515625e-05     3.03%
(8,10)          10   7.41481781006e-05	  6.19888305664e-05  16.40%
(16,16)         1    7.48634338379e-05	  6.29425048828e-05  15.92%
(16,16)         2    7.5101852417e-05	  7.60555267334e-05  -1.27%
(16,16)         16   6.38961791992e-05	  7.67707824707e-05 -20.15%
(16,10)         1    8.48770141602e-05	  6.103515625e-05    28.09%
(16,10)         2    6.19888305664e-05	  6.19888305664e-05   0.00%
(16,10)         10   6.38961791992e-05	  6.29425048828e-05   1.49%
(10,16)         1    6.50882720947e-05	  6.19888305664e-05   4.76%
(10,16)         2    6.29425048828e-05	  6.103515625e-05     3.03%
(10,16)         16   7.60555267334e-05	  6.50882720947e-05  14.42%
(101,101)       1    0.000195980072021	  7.60555267334e-05  61.19%
(101,101)       2    0.000201940536499	  9.20295715332e-05  54.43%
(101,101)       101  0.000378847122192	  0.00026798248291   29.26%
(101,31)        1    8.51154327393e-05	  6.38961791992e-05  24.93%
(101,31)        2    8.20159912109e-05	  7.29560852051e-05  11.05%
(101,31)        31   9.70363616943e-05	  8.70227813721e-05  10.32%
(31,101)        1    9.58442687988e-05	  6.8187713623e-05   28.86%
(31,101)        2    0.000118017196655	  7.39097595215e-05  37.37%
(31,101)        101  0.000128984451294	  0.000111103057861  13.86%
(256,256)       1    0.00152015686035	  0.000142812728882  90.61%
(256,256)       2    0.00153708457947	  0.000188112258911  87.76%
(256,256)       256  0.00419092178345	  0.00194692611694   53.54%
(256,200)       1    0.000885009765625	  0.000108003616333  87.80%
(256,200)       2    0.000905990600586	  0.000153064727783  83.11%
(256,200)       200  0.00238180160522	  0.00109219551086   54.14%
(200,256)       1    0.000879049301147	  0.000108957290649  87.61%
(200,256)       2    0.000900030136108	  0.000159025192261  82.33%
(200,256)       256  0.00287699699402	  0.00136303901672   52.62%
(1001,1001)     1    0.0573220252991	  0.00101280212402   98.23%
(1001,1001)     2    0.0591979026794	  0.00128102302551   97.84%
(1001,1001)     1001 0.205045938492	  0.0915141105652    55.37%
(1001,501)      1    0.0129158496857	  0.000411987304688  96.81%
(1001,501)      2    0.0133857727051	  0.000498056411743  96.28%
(1001,501)      501  0.0418498516083	  0.0139379501343    66.70%
(501,1001)      1    0.012974023819	  0.000403165817261  96.89%
(501,1001)      2    0.0131361484528	  0.000482082366943  96.33%
(501,1001)      1001 0.0733780860901	  0.0280270576477    61.80%
(1024,1024)     1    0.0646319389343	  0.00120306015015   98.14%
(1024,1024)     2    0.0661849975586	  0.00151205062866   97.72%
(1024,1024)     1024 0.227252960205	  0.0971050262451    57.27%
(1024,128)      1    0.00104093551636	  9.60826873779e-05  90.77%
(1024,128)      2    0.00108408927917	  0.000156879425049  85.53%
(1024,128)      128  0.00251793861389	  0.000633955001831  74.82%
(128,1024)      1    0.00103306770325	  9.41753387451e-05  90.88%
(128,1024)      2    0.00111484527588	  0.000247955322266  77.76%
(128,1024)      1024 0.0141911506653	  0.00415301322937   70.74%
(2048,2048)     1    0.47181892395	  0.00541090965271   98.85%
(2048,2048)     2    0.477843999863	  0.00651216506958   98.64%
(2048,2048)     2048 1.75637602806	  0.759104013443     56.78%
(2048,64)       1    0.000587940216064	  9.08374786377e-05  84.55%
(2048,64)       2    0.000652074813843	  0.000128030776978  80.37%
(2048,64)       64   0.00130105018616	  0.000342130661011  73.70%
(64,2048)       1    0.000602006912231	  8.60691070557e-05  85.70%
(64,2048)       2    0.000680923461914	  0.000250816345215  63.17%
(64,2048)       2048 0.027850151062	  0.00638389587402   77.08%
(513,4,4)      1    0.000416040420532	  0.000288963317871  30.54%
(513,4,4)      2    0.000466823577881	  0.000313997268677  32.74%
(513,4,4)      4    0.000488996505737	  0.000338077545166  30.86%
(513,4,2)      1    0.000366926193237	  0.0002760887146    24.76%
(513,4,2)      2    0.000352144241333	  0.000269889831543  23.36%
(513,4,2)      2    0.000343084335327	  0.000263929367065  23.07%
(513,2,4)      1    0.000388145446777	  0.000275850296021  28.93%
(513,2,4)      2    0.000396013259888	  0.000270128250122  31.79%
(513,2,4)      4    0.00041389465332	  0.000282049179077  31.85%
(513,16,16)    1    0.000340938568115	  0.000346899032593  -1.75%
(513,16,16)    2    0.0003821849823	  0.000375986099243   1.62%
(513,16,16)    16   0.000468015670776	  0.000473022460938  -1.07%
(513,16,10)    1    0.000268936157227	  0.000272989273071  -1.51%
(513,16,10)    2    0.000308990478516	  0.000308036804199   0.31%
(513,16,10)    10   0.000367164611816	  0.000363111495972   1.10%
(513,10,16)    1    0.0002601146698	  0.000262975692749  -1.10%
(513,10,16)    2    0.000304937362671	  0.000303983688354   0.31%
(513,10,16)    16   0.00036883354187	  0.000371932983398  -0.84%
(513,256,256)  1    0.109347105026	  0.11101102829	     -1.52%
(513,256,256)  2    0.114288806915	  0.114917993546     -0.55%
(513,256,256)  256  0.353310108185	  0.348378896713      1.40%
(513,256,128)  1    0.0285301208496	  0.0287981033325    -0.94%
(513,256,128)  2    0.0313191413879	  0.0301601886749     3.70%
(513,256,128)  128  0.0773530006409	  0.0720520019531     6.85%
(513,128,256)  1    0.0281269550323	  0.0283789634705    -0.90%
(513,128,256)  2    0.0297160148621	  0.0298500061035    -0.45%
(513,128,256)  256  0.140498876572	  0.126143932343     10.22%

PiperOrigin-RevId: 167026381
---
 tensorflow/python/kernel_tests/BUILD          |   3 +
 .../python/kernel_tests/linalg_grad_test.py   |  41 +-
 .../kernel_tests/matrix_band_part_op_test.py  |   4 +-
 .../kernel_tests/matrix_solve_ls_op_test.py   | 442 ++++++++++++------
 .../kernel_tests/matrix_solve_op_test.py      |  23 +-
 tensorflow/python/ops/linalg_grad.py          |  51 +-
 tensorflow/python/ops/linalg_ops.py           | 130 +++++-
 7 files changed, 494 insertions(+), 200 deletions(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index bdd9a6dc22..4fa1e1fee8 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -522,9 +522,11 @@ tf_py_test(
     srcs = ["matrix_solve_ls_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:linalg_ops",
+        "//tensorflow/python:math_ops",
     ],
 )
 
@@ -534,6 +536,7 @@ tf_py_test(
     srcs = ["matrix_solve_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:linalg_ops",
diff --git a/tensorflow/python/kernel_tests/linalg_grad_test.py b/tensorflow/python/kernel_tests/linalg_grad_test.py
index b40bcd7e13..7d367a9275 100644
--- a/tensorflow/python/kernel_tests/linalg_grad_test.py
+++ b/tensorflow/python/kernel_tests/linalg_grad_test.py
@@ -59,7 +59,7 @@ class MatrixUnaryFunctorGradientTest(test_lib.TestCase):
 def _GetMatrixUnaryFunctorGradientTest(functor_, dtype_, shape_, **kwargs_):
 
   def Test(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       np.random.seed(1)
       a_np = np.random.uniform(
           low=-1.0, high=1.0,
@@ -97,7 +97,11 @@ def _GetMatrixBinaryFunctorGradientTest(functor_,
                                         **kwargs_):
 
   def Test(self):
-    with self.test_session():
+    # TODO(rmlarsen): Debug illegal address bug on CUDA and re-enable
+    # GPU test for matrix_solve.
+    use_gpu = False if functor_ == linalg_ops.matrix_solve else True
+
+    with self.test_session(use_gpu=use_gpu):
       np.random.seed(1)
       a_np = np.random.uniform(
           low=-1.0, high=1.0,
@@ -142,26 +146,21 @@ if __name__ == '__main__':
           shape = extra + (size, size)
           name = '%s_%s_adj_%s' % (dtype.__name__, '_'.join(map(str, shape)),
                                    str(adjoint))
-          _AddTest(
-              MatrixBinaryFunctorGradientTest,
-              'MatrixSolveGradient',
-              name,
-              _GetMatrixBinaryFunctorGradientTest(
-                  linalg_ops.matrix_solve, dtype, shape, adjoint=adjoint))
+          _AddTest(MatrixBinaryFunctorGradientTest, 'MatrixSolveGradient', name,
+                   _GetMatrixBinaryFunctorGradientTest(
+                       linalg_ops.matrix_solve, dtype, shape, adjoint=adjoint))
 
           for lower in True, False:
             name = '%s_low_%s' % (name, lower)
-            _AddTest(
-                MatrixBinaryFunctorGradientTest,
-                'MatrixTriangularSolveGradient',
-                name,
-                _GetMatrixBinaryFunctorGradientTest(
-                    linalg_ops.matrix_triangular_solve,
-                    dtype,
-                    shape,
-                    float32_tol_fudge=4.0,
-                    adjoint=adjoint,
-                    lower=lower))
+            _AddTest(MatrixBinaryFunctorGradientTest,
+                     'MatrixTriangularSolveGradient', name,
+                     _GetMatrixBinaryFunctorGradientTest(
+                         linalg_ops.matrix_triangular_solve,
+                         dtype,
+                         shape,
+                         float32_tol_fudge=4.0,
+                         adjoint=adjoint,
+                         lower=lower))
 
   # Tests for gradients of unary matrix operations.
   for dtype in np.float32, np.float64:
@@ -191,8 +190,10 @@ if __name__ == '__main__':
               MatrixBinaryFunctorGradientTest,
               'MatrixSolveLsGradient',
               name,
+              # pylint: disable=long-lambda,g-long-lambda
               _GetMatrixBinaryFunctorGradientTest(
-                  lambda a, b, l=l2_regularization: linalg_ops.matrix_solve_ls(a, b, l),
+                  (lambda a, b, l=l2_regularization:
+                   linalg_ops.matrix_solve_ls(a, b, l)),
                   dtype,
                   shape,
                   float32_tol_fudge=4.0))
diff --git a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
index c2530b3597..e641d5511f 100644
--- a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
@@ -33,7 +33,7 @@ def _GetMatrixBandPartTest(dtype_, batch_shape_, shape_):
 
   def Test(self):
     mat = np.ones(shape_).astype(dtype_)
-    batch_mat = np.tile(mat, batch_shape + (1, 1))
+    batch_mat = np.tile(mat, batch_shape_ + (1, 1))
     with self.test_session(use_gpu=True):
       for lower in -1, 0, 1, shape_[-2] - 1:
         for upper in -1, 0, 1, shape_[-1] - 1:
@@ -42,7 +42,7 @@ def _GetMatrixBandPartTest(dtype_, batch_shape_, shape_):
             band_np = np.triu(band_np, -lower)
           if upper >= 0:
             band_np = np.tril(band_np, upper)
-          if batch_shape is not ():
+          if batch_shape_ is not ():
             band_np = np.tile(band_np, batch_shape + (1, 1))
           band = array_ops.matrix_band_part(batch_mat, lower, upper)
           self.assertAllEqual(band_np, band.eval())
diff --git a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
index ec4effec1b..de495968a7 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
@@ -20,158 +20,121 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.client import session
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import linalg_ops
-from tensorflow.python.platform import test
-
-
-def BatchMatMul(a, b):
-  # A numpy implementation of tf.matmul().
-  if a.ndim < 3:
-    return np.dot(a, b)
-  # Get the number of matrices.
-  n = np.prod(a.shape[:-2])
-  assert n == np.prod(b.shape[:-2])
-  a_flat = np.reshape(a, tuple([n]) + a.shape[-2:])
-  b_flat = np.reshape(b, tuple([n]) + b.shape[-2:])
-  c_flat_shape = [n, a.shape[-2], b.shape[-1]]
-  c_flat = np.empty(c_flat_shape)
-  for i in range(n):
-    c_flat[i, :, :] = np.dot(a_flat[i, :, :], b_flat[i, :, :])
-  return np.reshape(c_flat, a.shape[:-1] + b_flat.shape[-1:])
-
-
-def BatchRegularizedLeastSquares(matrices, rhss, l2_regularization=0.0):
-  # A numpy implementation of regularized least squares solver using
-  # the normal equations.
-  matrix_dims = matrices.shape
-  matrices_transposed = np.swapaxes(matrices, -2, -1)
-  rows = matrix_dims[-2]
-  cols = matrix_dims[-1]
-  if rows >= cols:
-    preconditioner = l2_regularization * np.identity(cols)
-    gramian = BatchMatMul(matrices_transposed, matrices) + preconditioner
-    inverse = np.linalg.inv(gramian)
-    left_pseudo_inverse = BatchMatMul(inverse, matrices_transposed)
-    return BatchMatMul(left_pseudo_inverse, rhss)
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test as test_lib
+
+
+def _AddTest(test, op_name, testcase_name, fn):
+  test_name = "_".join(["test", op_name, testcase_name])
+  if hasattr(test, test_name):
+    raise RuntimeError("Test %s defined more than once" % test_name)
+  setattr(test, test_name, fn)
+
+
+def _GenerateTestData(matrix_shape, num_rhs):
+  batch_shape = matrix_shape[:-2]
+  matrix_shape = matrix_shape[-2:]
+  m = matrix_shape[-2]
+  np.random.seed(1)
+  matrix = np.random.uniform(
+      low=-1.0, high=1.0,
+      size=np.prod(matrix_shape)).reshape(matrix_shape).astype(np.float32)
+  rhs = np.ones([m, num_rhs]).astype(np.float32)
+  matrix = variables.Variable(
+      np.tile(matrix, batch_shape + (1, 1)), trainable=False)
+  rhs = variables.Variable(np.tile(rhs, batch_shape + (1, 1)), trainable=False)
+  return matrix, rhs
+
+
+def _SolveWithNumpy(matrix, rhs, l2_regularizer=0):
+  if l2_regularizer == 0:
+    np_ans, _, _, _ = np.linalg.lstsq(matrix, rhs)
+    return np_ans
   else:
-    preconditioner = l2_regularization * np.identity(rows)
-    gramian = BatchMatMul(matrices, matrices_transposed) + preconditioner
-    inverse = np.linalg.inv(gramian)
-    right_pseudo_inverse = BatchMatMul(matrices_transposed, inverse)
-    return BatchMatMul(right_pseudo_inverse, rhss)
+    rows = matrix.shape[-2]
+    cols = matrix.shape[-1]
+    if rows >= cols:
+      preconditioner = l2_regularizer * np.identity(cols)
+      gramian = np.dot(np.conj(matrix.T), matrix) + preconditioner
+      rhs = np.dot(np.conj(matrix.T), rhs)
+      return np.linalg.solve(gramian, rhs)
+    else:
+      preconditioner = l2_regularizer * np.identity(rows)
+      gramian = np.dot(matrix, np.conj(matrix.T)) + preconditioner
+      z = np.linalg.solve(gramian, rhs)
+      return np.dot(np.conj(matrix.T), z)
 
 
-class MatrixSolveLsOpTest(test.TestCase):
+class MatrixSolveLsOpTest(test_lib.TestCase):
 
-  def _verifySolve(self, x, y):
-    for np_type in [np.float32, np.float64, np.complex64, np.complex128]:
-      a = x.astype(np_type)
-      b = y.astype(np_type)
-      if np_type in [np.complex64, np.complex128]:
+  def _verifySolve(self,
+                   x,
+                   y,
+                   dtype,
+                   use_placeholder,
+                   fast,
+                   l2_regularizer,
+                   batch_shape=()):
+    if not fast and l2_regularizer != 0:
+      # The slow path does not support regularization.
+      return
+    maxdim = np.max(x.shape)
+    if dtype == np.float32 or dtype == np.complex64:
+      tol = maxdim * 5e-4
+    else:
+      tol = maxdim * 5e-7
+      a = x.astype(dtype)
+      b = y.astype(dtype)
+      if dtype in [np.complex64, np.complex128]:
         a.imag = a.real
         b.imag = b.real
-      np_ans, _, _, _ = np.linalg.lstsq(a, b)
-      for fast in [True, False]:
-        with self.test_session():
-          tf_ans = linalg_ops.matrix_solve_ls(a, b, fast=fast)
-          ans = tf_ans.eval()
-        self.assertEqual(np_ans.shape, tf_ans.get_shape())
-        self.assertEqual(np_ans.shape, ans.shape)
-
-        # Check residual norm.
-        tf_r = b - BatchMatMul(a, ans)
-        tf_r_norm = np.sum(tf_r * tf_r)
-        np_r = b - BatchMatMul(a, np_ans)
-        np_r_norm = np.sum(np_r * np_r)
-        self.assertAllClose(np_r_norm, tf_r_norm)
-
-        # Check solution.
-        if np_type == np.float32 or np_type == np.complex64:
-          tol = 5e-5
+      # numpy.linalg.lstqr does not batching, so we just solve a single system
+      # and replicate the solution. and residual norm.
+      np_ans = _SolveWithNumpy(x, y, l2_regularizer=l2_regularizer)
+      np_r = np.dot(np.conj(a.T), b - np.dot(a, np_ans))
+      np_r_norm = np.sqrt(np.sum(np.conj(np_r) * np_r))
+      if batch_shape is not ():
+        a = np.tile(a, batch_shape + (1, 1))
+        b = np.tile(b, batch_shape + (1, 1))
+        np_ans = np.tile(np_ans, batch_shape + (1, 1))
+        np_r_norm = np.tile(np_r_norm, batch_shape)
+      with self.test_session(use_gpu=fast) as sess:
+        if use_placeholder:
+          a_ph = array_ops.placeholder(dtypes.as_dtype(dtype))
+          b_ph = array_ops.placeholder(dtypes.as_dtype(dtype))
+          feed_dict = {a_ph: a, b_ph: b}
+          tf_ans = linalg_ops.matrix_solve_ls(
+              a_ph, b_ph, fast=fast, l2_regularizer=l2_regularizer)
         else:
-          tol = 1e-12
-        self.assertAllClose(np_ans, ans, atol=tol, rtol=tol)
-
-  def _verifySolveBatch(self, x, y):
-    # Since numpy.linalg.lsqr does not support batch solves, as opposed
-    # to numpy.linalg.solve, we just perform this test for a fixed batch size
-    # of 2x3.
-    for np_type in [np.float32, np.float64]:
-      a = np.tile(x.astype(np_type), [2, 3, 1, 1])
-      b = np.tile(y.astype(np_type), [2, 3, 1, 1])
-      np_ans = np.empty([2, 3, a.shape[-1], b.shape[-1]])
-      for dim1 in range(2):
-        for dim2 in range(3):
-          np_ans[dim1, dim2, :, :], _, _, _ = np.linalg.lstsq(
-              a[dim1, dim2, :, :], b[dim1, dim2, :, :])
-      for fast in [True, False]:
-        with self.test_session():
-          tf_ans = linalg_ops.matrix_solve_ls(a, b, fast=fast).eval()
-        self.assertEqual(np_ans.shape, tf_ans.shape)
-        # Check residual norm.
-        tf_r = b - BatchMatMul(a, tf_ans)
-        tf_r_norm = np.sum(tf_r * tf_r)
-        np_r = b - BatchMatMul(a, np_ans)
-        np_r_norm = np.sum(np_r * np_r)
-        self.assertAllClose(np_r_norm, tf_r_norm)
-        # Check solution.
-        if fast or a.shape[-2] >= a.shape[-1]:
-          # We skip this test for the underdetermined case when using the
-          # slow path, because Eigen does not return a minimum norm solution.
-          # TODO(rmlarsen): Enable this check for all paths if/when we fix
-          # Eigen's solver.
-          self.assertAllClose(np_ans, tf_ans, atol=1e-5, rtol=1e-5)
-
-  def _verifyRegularized(self, x, y, l2_regularizer):
-    for np_type in [np.float32, np.float64]:
-      # Test with a single matrix.
-      a = x.astype(np_type)
-      b = y.astype(np_type)
-      np_ans = BatchRegularizedLeastSquares(a, b, l2_regularizer)
-      with self.test_session():
-        # Test matrix_solve_ls on regular matrices
-        tf_ans = linalg_ops.matrix_solve_ls(
-            a, b, l2_regularizer=l2_regularizer, fast=True).eval()
-        self.assertAllClose(np_ans, tf_ans, atol=1e-5, rtol=1e-5)
-
-      # Test with a 2x3 batch of matrices.
-      a = np.tile(x.astype(np_type), [2, 3, 1, 1])
-      b = np.tile(y.astype(np_type), [2, 3, 1, 1])
-      np_ans = BatchRegularizedLeastSquares(a, b, l2_regularizer)
-      with self.test_session():
-        tf_ans = linalg_ops.matrix_solve_ls(
-            a, b, l2_regularizer=l2_regularizer, fast=True).eval()
-      self.assertAllClose(np_ans, tf_ans, atol=1e-5, rtol=1e-5)
-
-  def testSquare(self):
-    # 2x2 matrices, 2x3 right-hand sides.
-
-    matrix = np.array([[1., 2.], [3., 4.]])
-    rhs = np.array([[1., 0., 1.], [0., 1., 1.]])
-    self._verifySolve(matrix, rhs)
-    self._verifySolveBatch(matrix, rhs)
-    self._verifyRegularized(matrix, rhs, l2_regularizer=0.1)
-
-  def testOverdetermined(self):
-    # 2x2 matrices, 2x3 right-hand sides.
-    matrix = np.array([[1., 2.], [3., 4.], [5., 6.]])
-    rhs = np.array([[1., 0., 1.], [0., 1., 1.], [1., 1., 0.]])
-    self._verifySolve(matrix, rhs)
-    self._verifySolveBatch(matrix, rhs)
-    self._verifyRegularized(matrix, rhs, l2_regularizer=0.1)
+          tf_ans = linalg_ops.matrix_solve_ls(
+              a, b, fast=fast, l2_regularizer=l2_regularizer)
+          feed_dict = {}
+          self.assertEqual(np_ans.shape, tf_ans.get_shape())
+        if l2_regularizer == 0:
+          # The least squares solution should satisfy A^H * (b - A*x) = 0.
+          tf_r = b - math_ops.matmul(a, tf_ans)
+          tf_r = math_ops.matmul(a, tf_r, adjoint_a=True)
+          tf_r_norm = linalg_ops.norm(tf_r, ord="fro", axis=[-2, -1])
+          tf_ans_val, tf_r_norm_val = sess.run(
+              [tf_ans, tf_r_norm], feed_dict=feed_dict)
+          self.assertAllClose(np_r_norm, tf_r_norm_val, atol=tol, rtol=tol)
+        else:
+          tf_ans_val = sess.run(tf_ans, feed_dict=feed_dict)
 
-  def testUnderdetermined(self):
-    # 2x2 matrices, 2x3 right-hand sides.
-    matrix = np.array([[1., 2., 3], [4., 5., 6.]])
-    rhs = np.array([[1., 0., 1.], [0., 1., 1.]])
-    self._verifySolve(matrix, rhs)
-    self._verifySolveBatch(matrix, rhs)
-    self._verifyRegularized(matrix, rhs, l2_regularizer=0.1)
+      self.assertEqual(np_ans.shape, tf_ans_val.shape)
+      self.assertAllClose(np_ans, tf_ans_val, atol=2 * tol, rtol=2 * tol)
 
   def testWrongDimensions(self):
     # The matrix and right-hand sides should have the same number of rows.
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       matrix = constant_op.constant([[1., 0.], [0., 1.]])
       rhs = constant_op.constant([[1., 0.]])
       with self.assertRaises(ValueError):
@@ -182,7 +145,7 @@ class MatrixSolveLsOpTest(test.TestCase):
     empty0 = np.empty([3, 0])
     empty1 = np.empty([0, 2])
     for fast in [True, False]:
-      with self.test_session():
+      with self.test_session(use_gpu=True):
         tf_ans = linalg_ops.matrix_solve_ls(empty0, empty0, fast=fast).eval()
         self.assertEqual(tf_ans.shape, (0, 0))
         tf_ans = linalg_ops.matrix_solve_ls(empty0, full, fast=fast).eval()
@@ -202,5 +165,202 @@ class MatrixSolveLsOpTest(test.TestCase):
     self.assertEqual(answer.get_shape(), [3, 3, 1])
 
 
+def _GetSmallMatrixSolveLsOpTests(dtype, use_placeholder, fast, l2_regularizer):
+
+  def Square(self):
+    # 2x2 matrices, 2x3 right-hand sides.
+    matrix = np.array([[1., 2.], [3., 4.]])
+    rhs = np.array([[1., 0., 1.], [0., 1., 1.]])
+    for batch_shape in (), (2, 3):
+      self._verifySolve(
+          matrix,
+          rhs,
+          dtype,
+          use_placeholder,
+          fast,
+          l2_regularizer,
+          batch_shape=batch_shape)
+
+  def Overdetermined(self):
+    # 2x2 matrices, 2x3 right-hand sides.
+    matrix = np.array([[1., 2.], [3., 4.], [5., 6.]])
+    rhs = np.array([[1., 0., 1.], [0., 1., 1.], [1., 1., 0.]])
+    for batch_shape in (), (2, 3):
+      self._verifySolve(
+          matrix,
+          rhs,
+          dtype,
+          use_placeholder,
+          fast,
+          l2_regularizer,
+          batch_shape=batch_shape)
+
+  def Underdetermined(self):
+    # 2x2 matrices, 2x3 right-hand sides.
+    matrix = np.array([[1., 2., 3], [4., 5., 6.]])
+    rhs = np.array([[1., 0., 1.], [0., 1., 1.]])
+    for batch_shape in (), (2, 3):
+      self._verifySolve(
+          matrix,
+          rhs,
+          dtype,
+          use_placeholder,
+          fast,
+          l2_regularizer,
+          batch_shape=batch_shape)
+
+  return (Square, Overdetermined, Underdetermined)
+
+
+def _GetLargeMatrixSolveLsOpTests(dtype, use_placeholder, fast, l2_regularizer):
+
+  def LargeBatchSquare(self):
+    np.random.seed(1)
+    num_rhs = 1
+    matrix_shape = (127, 127)
+    matrix = np.random.uniform(
+        low=-1.0, high=1.0,
+        size=np.prod(matrix_shape)).reshape(matrix_shape).astype(np.float32)
+    rhs = np.ones([matrix_shape[0], num_rhs]).astype(np.float32)
+    self._verifySolve(
+        matrix,
+        rhs,
+        dtype,
+        use_placeholder,
+        fast,
+        l2_regularizer,
+        batch_shape=(16, 8))
+
+  def LargeBatchOverdetermined(self):
+    np.random.seed(1)
+    num_rhs = 1
+    matrix_shape = (127, 64)
+    matrix = np.random.uniform(
+        low=-1.0, high=1.0,
+        size=np.prod(matrix_shape)).reshape(matrix_shape).astype(np.float32)
+    rhs = np.ones([matrix_shape[0], num_rhs]).astype(np.float32)
+    self._verifySolve(
+        matrix,
+        rhs,
+        dtype,
+        use_placeholder,
+        fast,
+        l2_regularizer,
+        batch_shape=(16, 8))
+
+  def LargeBatchUnderdetermined(self):
+    np.random.seed(1)
+    num_rhs = 1
+    matrix_shape = (64, 127)
+    matrix = np.random.uniform(
+        low=-1.0, high=1.0,
+        size=np.prod(matrix_shape)).reshape(matrix_shape).astype(np.float32)
+    rhs = np.ones([matrix_shape[0], num_rhs]).astype(np.float32)
+    self._verifySolve(
+        matrix,
+        rhs,
+        dtype,
+        use_placeholder,
+        fast,
+        l2_regularizer,
+        batch_shape=(16, 8))
+
+  return (LargeBatchSquare, LargeBatchOverdetermined, LargeBatchUnderdetermined)
+
+
+class MatrixSolveLsBenchmark(test_lib.Benchmark):
+
+  matrix_shapes = [
+      (4, 4),
+      (8, 4),
+      (4, 8),
+      (10, 10),
+      (10, 8),
+      (8, 10),
+      (16, 16),
+      (16, 10),
+      (10, 16),
+      (101, 101),
+      (101, 31),
+      (31, 101),
+      (256, 256),
+      (256, 200),
+      (200, 256),
+      (1001, 1001),
+      (1001, 501),
+      (501, 1001),
+      (1024, 1024),
+      (1024, 128),
+      (128, 1024),
+      (2048, 2048),
+      (2048, 64),
+      (64, 2048),
+      (513, 4, 4),
+      (513, 4, 2),
+      (513, 2, 4),
+      (513, 16, 16),
+      (513, 16, 10),
+      (513, 10, 16),
+      (513, 256, 256),
+      (513, 256, 128),
+      (513, 128, 256),
+  ]
+
+  def benchmarkMatrixSolveLsOp(self):
+    run_gpu_test = test_lib.is_gpu_available(True)
+    regularizer = 1.0
+    for matrix_shape in self.matrix_shapes:
+      for num_rhs in 1, 2, matrix_shape[-1]:
+
+        with ops.Graph().as_default(), \
+            session.Session() as sess, \
+            ops.device("/cpu:0"):
+          matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
+          x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
+          variables.global_variables_initializer().run()
+          self.run_op_benchmark(
+              sess,
+              control_flow_ops.group(x),
+              min_iters=25,
+              store_memory_usage=False,
+              name=("matrix_solve_ls_cpu_shape_{matrix_shape}_num_rhs_{num_rhs}"
+                   ).format(matrix_shape=matrix_shape, num_rhs=num_rhs))
+
+        if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513):
+          with ops.Graph().as_default(), \
+                session.Session() as sess, \
+                ops.device("/gpu:0"):
+            matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
+            x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
+            variables.global_variables_initializer().run()
+            self.run_op_benchmark(
+                sess,
+                control_flow_ops.group(x),
+                min_iters=25,
+                store_memory_usage=False,
+                name=("matrix_solve_ls_gpu_shape_{matrix_shape}_num_rhs_"
+                      "{num_rhs}").format(
+                          matrix_shape=matrix_shape, num_rhs=num_rhs))
+
+
 if __name__ == "__main__":
-  test.main()
+  for dtype_ in [np.float32, np.float64, np.complex64, np.complex128]:
+    for use_placeholder_ in [True, False]:
+      for fast_ in [True, False]:
+        l2_regularizers = [0] if dtype_ == np.complex128 else [0, 0.1]
+        for l2_regularizer_ in l2_regularizers:
+          for test_case in _GetSmallMatrixSolveLsOpTests(
+              dtype_, use_placeholder_, fast_, l2_regularizer_):
+            name = "%s_%s_placeholder_%s_fast_%s_regu_%s" % (test_case.__name__,
+                                                             dtype_.__name__,
+                                                             use_placeholder_,
+                                                             fast_,
+                                                             l2_regularizer_)
+            _AddTest(MatrixSolveLsOpTest, "MatrixSolveLsOpTest", name,
+                     test_case)
+  for dtype_ in [np.float32, np.float64, np.complex64, np.complex128]:
+    for test_case in _GetLargeMatrixSolveLsOpTests(dtype_, False, True, 0.0):
+      name = "%s_%s" % (test_case.__name__, dtype_.__name__)
+      _AddTest(MatrixSolveLsOpTest, "MatrixSolveLsOpTest", name, test_case)
+
+  test_lib.main()
diff --git a/tensorflow/python/kernel_tests/matrix_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
index 4a1f3591f1..9699359538 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
@@ -22,7 +22,9 @@ import numpy as np
 
 from tensorflow.python.client import session
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import variables
@@ -44,18 +46,25 @@ class MatrixSolveOpTest(test.TestCase):
         else:
           a = x.astype(np_type)
           b = y.astype(np_type)
-        a_np = np.conj(np.transpose(a)) if adjoint else a
+          a_np = np.conj(np.transpose(a)) if adjoint else a
         if batch_dims is not None:
           a = np.tile(a, batch_dims + [1, 1])
           a_np = np.tile(a_np, batch_dims + [1, 1])
           b = np.tile(b, batch_dims + [1, 1])
         np_ans = np.linalg.solve(a_np, b)
-        with self.test_session(use_gpu=True):
-          tf_ans = linalg_ops.matrix_solve(a, b, adjoint=adjoint)
-          out = tf_ans.eval()
-          self.assertEqual(tf_ans.get_shape(), out.shape)
-          self.assertEqual(np_ans.shape, out.shape)
-          self.assertAllClose(np_ans, out, atol=tol, rtol=tol)
+        for use_placeholder in False, True:
+          with self.test_session(use_gpu=True) as sess:
+            if use_placeholder:
+              a_ph = array_ops.placeholder(dtypes.as_dtype(np_type))
+              b_ph = array_ops.placeholder(dtypes.as_dtype(np_type))
+              tf_ans = linalg_ops.matrix_solve(a_ph, b_ph, adjoint=adjoint)
+              out = sess.run(tf_ans, {a_ph: a, b_ph: b})
+            else:
+              tf_ans = linalg_ops.matrix_solve(a, b, adjoint=adjoint)
+              out = tf_ans.eval()
+              self.assertEqual(tf_ans.get_shape(), out.shape)
+            self.assertEqual(np_ans.shape, out.shape)
+            self.assertAllClose(np_ans, out, atol=tol, rtol=tol)
 
   def _generateMatrix(self, m, n):
     matrix = (np.random.normal(-5, 5,
diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py
index 460bf3f3f8..b7ae5665f1 100644
--- a/tensorflow/python/ops/linalg_grad.py
+++ b/tensorflow/python/ops/linalg_grad.py
@@ -39,8 +39,7 @@ def _MatrixInverseGrad(op, grad):
   """Gradient for MatrixInverse."""
   ainv = op.outputs[0]
   return -math_ops.matmul(
-      ainv, math_ops.matmul(
-          grad, ainv, adjoint_b=True), adjoint_a=True)
+      ainv, math_ops.matmul(grad, ainv, adjoint_b=True), adjoint_a=True)
 
 
 @ops.RegisterGradient("MatrixDeterminant")
@@ -49,8 +48,9 @@ def _MatrixDeterminantGrad(op, grad):
   a = op.inputs[0]
   c = op.outputs[0]
   a_adj_inv = linalg_ops.matrix_inverse(a, adjoint=True)
-  multipliers = array_ops.reshape(
-      grad * c, array_ops.concat([array_ops.shape(c), [1, 1]], 0))
+  multipliers = array_ops.reshape(grad * c,
+                                  array_ops.concat([array_ops.shape(c), [1, 1]],
+                                                   0))
   return multipliers * a_adj_inv
 
 
@@ -62,8 +62,11 @@ def _CholeskyGrad(op, grad):
   l = op.outputs[0]
   num_rows = array_ops.shape(l)[-1]
   batch_shape = array_ops.shape(l)[:-2]
-  l_inverse = linalg_ops.matrix_triangular_solve(
-      l, linalg_ops.eye(num_rows, batch_shape=batch_shape, dtype=l.dtype))
+  l_inverse = linalg_ops.matrix_triangular_solve(l,
+                                                 linalg_ops.eye(
+                                                     num_rows,
+                                                     batch_shape=batch_shape,
+                                                     dtype=l.dtype))
 
   middle = math_ops.matmul(l, grad, adjoint_a=True)
   middle = array_ops.matrix_set_diag(middle,
@@ -112,15 +115,12 @@ def _MatrixSolveLsGrad(op, grad):
     """
     a = op.inputs[0]
     b = op.inputs[1]
-    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
     x = op.outputs[0]
-    a_shape = array_ops.shape(a)
-    batch_shape = a_shape[:-2]
-    n = a_shape[-1]
-
-    identity = linalg_ops.eye(n, batch_shape=batch_shape, dtype=a.dtype)
-    gramian = math_ops.matmul(a, a, adjoint_a=True) + l2_regularizer * identity
-    chol = linalg_ops.cholesky(gramian)
+    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
+    # pylint: disable=protected-access
+    chol = linalg_ops._RegularizedGramianCholesky(
+        a, l2_regularizer=l2_regularizer, first_kind=True)
+    # pylint: enable=protected-access
     # Temporary z = (A^T * A + lambda * I)^{-1} * grad.
     z = linalg_ops.cholesky_solve(chol, grad)
     xzt = math_ops.matmul(x, z, adjoint_b=True)
@@ -141,13 +141,10 @@ def _MatrixSolveLsGrad(op, grad):
     a = op.inputs[0]
     b = op.inputs[1]
     l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
-    a_shape = array_ops.shape(a)
-    batch_shape = a_shape[:-2]
-    m = a_shape[-2]
-
-    identity = linalg_ops.eye(m, batch_shape=batch_shape, dtype=a.dtype)
-    gramian = math_ops.matmul(a, a, adjoint_b=True) + l2_regularizer * identity
-    chol = linalg_ops.cholesky(gramian)
+    # pylint: disable=protected-access
+    chol = linalg_ops._RegularizedGramianCholesky(
+        a, l2_regularizer=l2_regularizer, fist_kind=False)
+    # pylint: enable=protected-access
     grad_b = linalg_ops.cholesky_solve(chol, math_ops.matmul(a, grad))
     # Temporary tmp = (A * A^T + lambda * I)^{-1} * B.
     tmp = linalg_ops.cholesky_solve(chol, b)
@@ -219,15 +216,17 @@ def _SelfAdjointEigV2Grad(op, grad_e, grad_v):
       grad_a = math_ops.matmul(
           v,
           math_ops.matmul(
-              array_ops.matrix_diag(grad_e) + f * math_ops.matmul(
-                  v, grad_v, adjoint_a=True),
+              array_ops.matrix_diag(grad_e) +
+              f * math_ops.matmul(v, grad_v, adjoint_a=True),
               v,
               adjoint_b=True))
     else:
       _, v = linalg_ops.self_adjoint_eig(op.inputs[0])
-      grad_a = math_ops.matmul(
-          v, math_ops.matmul(
-              array_ops.matrix_diag(grad_e), v, adjoint_b=True))
+      grad_a = math_ops.matmul(v,
+                               math_ops.matmul(
+                                   array_ops.matrix_diag(grad_e),
+                                   v,
+                                   adjoint_b=True))
     # The forward op only depends on the lower triangular part of a, so here we
     # symmetrize and take the lower triangle
     grad_a = array_ops.matrix_band_part(
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index 34fe3a1241..2b11835d85 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -23,6 +23,7 @@ import numpy as np
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_linalg_ops
 from tensorflow.python.ops import math_ops
 # pylint: disable=wildcard-import
@@ -34,6 +35,47 @@ from tensorflow.python.util import compat
 # pylint: disable=invalid-name
 
 
+def _RegularizedGramianCholesky(matrix, l2_regularizer, first_kind):
+  r"""Computes Cholesky factorization of regularized gramian matrix.
+
+  Below we will use the following notation for each pair of matrix and
+  right-hand sides in the batch:
+
+  `matrix`=\\(A \in \Re^{m \times n}\\),
+  `output`=\\(C  \in \Re^{\min(m, n) \times \min(m,n)}\\),
+  `l2_regularizer`=\\(\lambda\\).
+
+  If `first_kind` is True, returns the Cholesky factorization \\(L\\) such that
+  \\(L L^H =  A^H A + \lambda I\\).
+  If `first_kind` is False, returns the Cholesky factorization \\(L\\) such that
+  \\(L L^H =  A A^H + \lambda I\\).
+
+  Args:
+    matrix: `Tensor` of shape `[..., M, N]`.
+    l2_regularizer: 0-D `double` `Tensor`. Ignored if `fast=False`.
+    first_kind: bool. Controls what gramian matrix to factor.
+  Returns:
+    output: `Tensor` of shape `[..., min(M,N), min(M,N)]` whose inner-most 2
+      dimensions contain the Cholesky factors \\(L\\) described above.
+  """
+
+  gramian = math_ops.matmul(
+      matrix, matrix, adjoint_a=first_kind, adjoint_b=not first_kind)
+  if isinstance(l2_regularizer, ops.Tensor) or l2_regularizer != 0:
+    matrix_shape = array_ops.shape(matrix)
+    batch_shape = matrix_shape[:-2]
+    if first_kind:
+      small_dim = matrix_shape[-1]
+    else:
+      small_dim = matrix_shape[-2]
+    identity = eye(small_dim, batch_shape=batch_shape, dtype=matrix.dtype)
+    small_dim_static = matrix.shape[-1 if first_kind else -2]
+    identity.set_shape(
+        matrix.shape[:-2].concatenate([small_dim_static, small_dim_static]))
+    gramian += l2_regularizer * identity
+  return gen_linalg_ops.cholesky(gramian)
+
+
 def cholesky_solve(chol, rhs, name=None):
   """Solves systems of linear eqns `A X = RHS`, given Cholesky factorizations.
 
@@ -195,10 +237,90 @@ def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
       `M`-by-`K` matrices that solve the equations
       `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]` in the least
       squares sense.
+
+  Raises:
+    NotImplementedError: matrix_solve_ls is currently disabled for complex128
+    and l2_regularizer != 0 due to poor accuracy.
   """
-  # pylint: disable=protected-access
-  return gen_linalg_ops._matrix_solve_ls(
-      matrix, rhs, l2_regularizer, fast=fast, name=name)
+
+  # pylint: disable=protected-access,long-lambda
+  def _use_composite_impl(fast, tensor_shape):
+    """Determines whether to use the composite or specialized CPU kernel.
+
+    When the total size of the tensor is larger than the cache size and the
+    batch size is large compared to the smallest matrix dimension, then the
+    composite implementation is inefficient since it has to read the entire
+    tensor from memory multiple times. In this case we fall back to the
+    original CPU kernel, which does all the computational steps on each
+    matrix separately.
+
+    Only fast mode is supported by the composite impl, so `False` is returned
+    if `fast` is `False`.
+
+    Args:
+      fast: bool indicating if fast mode in the solver was requested.
+      tensor_shape: The shape of the tensor.
+
+    Returns:
+      True if the composite impl should be used. False otherwise.
+    """
+    if fast is False:
+      return False
+    batch_shape = tensor_shape[:-2]
+    matrix_shape = tensor_shape[-2:]
+    if not tensor_shape.is_fully_defined():
+      return True
+    tensor_size = tensor_shape.num_elements() * matrix.dtype.size
+    is_io_bound = batch_shape.num_elements() > np.min(matrix_shape)
+    L2_CACHE_SIZE_GUESSTIMATE = 256000
+    if tensor_size > L2_CACHE_SIZE_GUESSTIMATE and is_io_bound:
+      return False
+    else:
+      return True
+
+  def _overdetermined(matrix, rhs, l2_regularizer):
+    """Computes (A^H*A + l2_regularizer)^{-1} * A^H * rhs."""
+    chol = _RegularizedGramianCholesky(
+        matrix, l2_regularizer=l2_regularizer, first_kind=True)
+    return cholesky_solve(chol, math_ops.matmul(matrix, rhs, adjoint_a=True))
+
+  def _underdetermined(matrix, rhs, l2_regularizer):
+    """Computes A^H * (A*A^H + l2_regularizer)^{-1} * rhs."""
+    chol = _RegularizedGramianCholesky(
+        matrix, l2_regularizer=l2_regularizer, first_kind=False)
+    return math_ops.matmul(matrix, cholesky_solve(chol, rhs), adjoint_a=True)
+
+  def _composite_impl(matrix, rhs, l2_regularizer):
+    """Composite implementation of matrix_solve_ls that supports GPU."""
+    with ops.name_scope(name, 'matrix_solve_ls', [matrix, rhs, l2_regularizer]):
+      matrix_shape = matrix.get_shape()[-2:]
+      if matrix_shape.is_fully_defined():
+        if matrix_shape[-2] >= matrix_shape[-1]:
+          return _overdetermined(matrix, rhs, l2_regularizer)
+        else:
+          return _underdetermined(matrix, rhs, l2_regularizer)
+      else:
+        # We have to defer determining the shape to runtime and use
+        # conditional execution of the appropriate graph.
+        matrix_shape = array_ops.shape(matrix)[-2:]
+        return control_flow_ops.cond(
+            matrix_shape[-2] >= matrix_shape[-1],
+            lambda: _overdetermined(matrix, rhs, l2_regularizer),
+            lambda: _underdetermined(matrix, rhs, l2_regularizer))
+
+  matrix = ops.convert_to_tensor(matrix, name='matrix')
+  if matrix.dtype == dtypes.complex128 and l2_regularizer != 0:
+    # TODO(rmlarsen): Investigate and fix accuracy bug.
+    raise NotImplementedError('matrix_solve_ls is currently disabled for '
+                              'complex128 and l2_regularizer != 0 due to '
+                              'poor accuracy.')
+  tensor_shape = matrix.get_shape()
+  if _use_composite_impl(fast, tensor_shape):
+    return _composite_impl(matrix, rhs, l2_regularizer)
+  else:
+    return gen_linalg_ops._matrix_solve_ls(
+        matrix, rhs, l2_regularizer, fast=fast, name=name)
+  # pylint: enable=protected-access
 
 
 def self_adjoint_eig(tensor, name=None):
@@ -291,7 +413,7 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None):
   """
   # pylint: disable=protected-access
   s, u, v = gen_linalg_ops._svd(
-      tensor, compute_uv=compute_uv, full_matrices=full_matrices)
+      tensor, compute_uv=compute_uv, full_matrices=full_matrices, name=name)
   # pylint: enable=protected-access
   if compute_uv:
     return math_ops.real(s), u, v
-- 
GitLab


From 8f1746c4b37441e5d6a080cd3c871bde913e9564 Mon Sep 17 00:00:00 2001
From: James Qin 
Date: Wed, 30 Aug 2017 12:19:30 -0700
Subject: [PATCH 107/294] Exposing CudnnCompatibleRNN classes.

Also add reuse arg to LSTMBlockCell.

PiperOrigin-RevId: 167030950
---
 tensorflow/contrib/cudnn_rnn/__init__.py                 | 7 +++++++
 tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py | 5 +++--
 tensorflow/contrib/rnn/python/ops/lstm_ops.py            | 7 ++++++-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/cudnn_rnn/__init__.py b/tensorflow/contrib/cudnn_rnn/__init__.py
index 470661a9b1..87ba834770 100644
--- a/tensorflow/contrib/cudnn_rnn/__init__.py
+++ b/tensorflow/contrib/cudnn_rnn/__init__.py
@@ -14,6 +14,8 @@
 # ==============================================================================
 """Ops for fused Cudnn RNN models.
 
+@@CudnnCompatibleGRUCell
+@@CudnnCompatibleLSTMCell
 @@CudnnGRU
 @@CudnnLSTM
 @@CudnnRNNRelu
@@ -28,6 +30,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnCompatibleGRUCell
+from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnCompatibleLSTMCell
 from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnGRU
 from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnGRUSaveable
 from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnLSTM
@@ -36,9 +40,12 @@ from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnRNNRelu
 from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnRNNReluSaveable
 from tensorflow.contrib.cudnn_rnn.python.ops.cudnn_rnn_ops import CudnnRNNTanhSaveable
 
+
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
+    "CudnnCompatibleGRUCell",
+    "CudnnCompatibleLSTMCell",
     "CudnnGRU",
     "CudnnLSTM",
     "CudnnRNNRelu",
diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
index 7794c371e1..694bd507d9 100644
--- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
+++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
@@ -63,9 +63,10 @@ class CudnnCompatibleLSTMCell(lstm_ops.LSTMBlockCell):
   this cell seamlessly.
   """
 
-  def __init__(self, num_units):
+  def __init__(self, num_units, reuse=None):
     super(CudnnCompatibleLSTMCell, self).__init__(
-        num_units, forget_bias=0, clip_cell=False, use_peephole=False)
+        num_units, forget_bias=0, clip_cell=False, use_peephole=False,
+        reuse=reuse)
     self._names.update({"scope": "cudnn_compatible_lstm_cell"})
 
 
diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
index 48c2c5a724..f591f7c84e 100644
--- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
@@ -342,7 +342,8 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell):
                num_units,
                forget_bias=1.0,
                clip_cell=True,
-               use_peephole=False):
+               use_peephole=False,
+               reuse=None):
     """Initialize the basic LSTM cell.
 
     Args:
@@ -351,10 +352,14 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell):
       clip_cell: boolean, whether to apply cell clipping. See
         `_lstm_block_cell()` for details.
       use_peephole: Whether to use peephole connections or not.
+      reuse: (optional) boolean describing whether to reuse variables in an
+        existing scope.  If not `True`, and the existing scope already has the
+        given variables, an error is raised.
 
       When restoring from CudnnLSTM-trained checkpoints, must use
       CudnnCompatibleLSTMBlockCell instead.
     """
+    super(LSTMBlockCell, self).__init__(_reuse=reuse)
     self._num_units = num_units
     self._forget_bias = forget_bias
     self._use_peephole = use_peephole
-- 
GitLab


From 797ca0d482457185f35d46cbce4c430f55b8b66a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 12:25:56 -0700
Subject: [PATCH 108/294] [XLA] Teach HloComputation::Reparent to properly
 handle reparenting into fusion computations.

This also moves HloInstruction::CheckFusionInstruction() out of "private", and adds calls to it in the reduce-precision-insertion test to confirm that the reduce-precision-insertion pass maintains valid fusion computations.  (These checks then fail without the fix to HloComputation::Reparent.)

PiperOrigin-RevId: 167031741
---
 tensorflow/compiler/xla/service/hlo_computation.cc          | 4 ++++
 tensorflow/compiler/xla/service/hlo_instruction.h           | 6 +++---
 .../compiler/xla/service/reduce_precision_insertion_test.cc | 4 ++++
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index b8133cda30..c030ceb72f 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -166,6 +166,10 @@ Status HloComputation::RemoveParameter(int64 param_no) {
 
 void HloComputation::Reparent(HloInstruction* instruction) {
   instruction->set_parent(this);
+  if (is_fusion_computation_ && instruction != root_instruction_) {
+    CHECK(root_instruction_->fusion_instruction() != nullptr);
+    instruction->SetParentFusion(root_instruction_->fusion_instruction());
+  }
 }
 
 bool HloComputation::IsRemovable(const HloInstruction* instruction) {
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index a97066a785..923aeb47f0 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -906,6 +906,9 @@ class HloInstruction {
     parent_fusion_instruction_ = fusion_instruction;
   }
 
+  // CHECKs various invariants of a fusion instruction.
+  void CheckFusionInstruction() const;
+
   // Get/Set the number of partitions per outer dimension (in order, starting
   // with outer-most dimension first). Currently used by the parallel cpu
   // backend to partition HLOs into parallel tasks.
@@ -976,9 +979,6 @@ class HloInstruction {
       const Shape& shape,
       tensorflow::gtl::ArraySlice operands);
 
-  // CHECKs various invariants of a fusion instruction.
-  void CheckFusionInstruction() const;
-
   // Returns true if this instruction can legally have the dimensions field
   // set. Used for checking precondition of dimensions field accessors.
   bool CanHaveDimensionsField() const;
diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc b/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc
index 064020896e..607abee33d 100644
--- a/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc
@@ -425,6 +425,7 @@ TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInHeadOfFusionNode) {
   EXPECT_EQ(computation->root_instruction(), z);
   HloInstruction* y_fused = z->fused_expression_root();
   EXPECT_EQ(y_fused->opcode(), HloOpcode::kCos);
+  z->CheckFusionInstruction();
 
   // This should see that the fusion computation contains a kCos operation,
   // and insert a new reduce-precision node at its input.
@@ -449,6 +450,7 @@ TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInHeadOfFusionNode) {
   EXPECT_EQ(computation->root_instruction(), z);
   EXPECT_THAT(z->fused_expression_root(), y_fused);
   EXPECT_THAT(y_fused->operand(0), op::ReducePrecision(op::Parameter()));
+  z->CheckFusionInstruction();
 }
 
 TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInTailOfFusionNode) {
@@ -466,6 +468,7 @@ TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInTailOfFusionNode) {
       shape, HloInstruction::FusionKind::kLoop, y));
   EXPECT_IS_OK(computation->ReplaceUsesOfInstruction(y, z));
   EXPECT_IS_OK(computation->RemoveInstruction(y));
+  z->CheckFusionInstruction();
 
   // Confirm expected graph before adding reduce-precision ops.
   EXPECT_THAT(x->users(), UnorderedElementsAre(z));
@@ -495,6 +498,7 @@ TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInTailOfFusionNode) {
   EXPECT_THAT(x->users(), UnorderedElementsAre(z));
   EXPECT_EQ(computation->root_instruction(), z);
   EXPECT_THAT(z->fused_expression_root(), op::ReducePrecision(y_fused));
+  z->CheckFusionInstruction();
 }
 
 TEST_F(ReducePrecisionInsertionTest, MakeFilterFunctionNoSubstrings) {
-- 
GitLab


From 03d310cc61a864600d24977f73138e643659986c Mon Sep 17 00:00:00 2001
From: Asim Shankar 
Date: Wed, 30 Aug 2017 12:31:45 -0700
Subject: [PATCH 109/294] Java: Fix 8-byte leak when constructing graph
 operations.

Thanks to @riklopfer for reporting in #11948

PiperOrigin-RevId: 167032430
---
 .../java/src/main/native/operation_builder_jni.cc  | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/tensorflow/java/src/main/native/operation_builder_jni.cc b/tensorflow/java/src/main/native/operation_builder_jni.cc
index 37f01a943a..e03be7b110 100644
--- a/tensorflow/java/src/main/native/operation_builder_jni.cc
+++ b/tensorflow/java/src/main/native/operation_builder_jni.cc
@@ -75,8 +75,10 @@ JNIEXPORT jlong JNICALL Java_org_tensorflow_OperationBuilder_finish(
   TF_Status* status = TF_NewStatus();
   TF_Operation* op = TF_FinishOperation(d, status);
   if (throwExceptionIfNotOK(env, status)) {
+    TF_DeleteStatus(status);
     return reinterpret_cast(op);
   }
+  TF_DeleteStatus(status);
   return 0;
 }
 
@@ -211,6 +213,7 @@ JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrTensor(
   TF_Status* status = TF_NewStatus();
   TF_SetAttrTensor(d, cname, t, status);
   throwExceptionIfNotOK(env, status);
+  TF_DeleteStatus(status);
   env->ReleaseStringUTFChars(name, cname);
 }
 
@@ -234,6 +237,7 @@ JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrTensorList(
   TF_Status* status = TF_NewStatus();
   TF_SetAttrTensorList(d, cname, tensors.get(), n, status);
   throwExceptionIfNotOK(env, status);
+  TF_DeleteStatus(status);
   env->ReleaseStringUTFChars(name, cname);
 }
 
@@ -259,7 +263,8 @@ JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrShape(
 }
 
 JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrStringList(
-        JNIEnv* env, jclass object, jlong handle, jstring name, jobjectArray values) {
+    JNIEnv* env, jclass object, jlong handle, jstring name,
+    jobjectArray values) {
   TF_OperationDescription* d = requireHandle(env, handle);
   if (d == nullptr) return;
   const char* cname = env->GetStringUTFChars(name, nullptr);
@@ -267,12 +272,13 @@ JNIEXPORT void JNICALL Java_org_tensorflow_OperationBuilder_setAttrStringList(
   static_assert(sizeof(jbyte) == 1,
                 "Require Java byte to be represented as a single byte");
   std::unique_ptr jarrays(new jbyteArray[num_values]);
-  std::unique_ptr jvalues(new jbyte*[num_values]);
-  std::unique_ptr cvalues(new void*[num_values]);
+  std::unique_ptr jvalues(new jbyte*[num_values]);
+  std::unique_ptr cvalues(new void*[num_values]);
   std::unique_ptr lengths(new size_t[num_values]);
 
   for (int i = 0; i < num_values; ++i) {
-    jbyteArray v = static_cast(env->GetObjectArrayElement(values, i));
+    jbyteArray v =
+        static_cast(env->GetObjectArrayElement(values, i));
     jarrays[i] = v;
     jvalues[i] = env->GetByteArrayElements(v, nullptr);
     cvalues[i] = jvalues[i];
-- 
GitLab


From 79603e4dd92b182ca0689da97d4d992d1d2b1316 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 12:39:32 -0700
Subject: [PATCH 110/294] Minor change in description.

PiperOrigin-RevId: 167033586
---
 tensorflow/contrib/data/python/ops/dataset_ops.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py
index 5b77129a48..46af2b1949 100644
--- a/tensorflow/contrib/data/python/ops/dataset_ops.py
+++ b/tensorflow/contrib/data/python/ops/dataset_ops.py
@@ -2134,7 +2134,8 @@ class FlatMapDataset(Dataset):
 
 
 class InterleaveDataset(Dataset):
-  """A `Dataset` that maps a function over its input and flattens the result."""
+  """A `Dataset` that maps a function over its input and interleaves the result.
+  """
 
   def __init__(self, input_dataset, map_func, cycle_length, block_length):
     """See `Dataset.interleave()` for details."""
-- 
GitLab


From e565d1f1fced69789feb10f1ea1241157ec95f93 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 13:19:39 -0700
Subject: [PATCH 111/294] [XLA] Refactor parent-fusion-instruction pointer into
 HloComputation, not HloInstruction.

Presently, each instruction inside a fusion computation contains a pointer to the fusion instruction that contains the computation, which is redundant since this is common across the entire computation.  This leads to lots of places where this pointer must be set when adding an instruction to the fusion computation (and bugs such as b/65177535 when one is missed), as well as code to check that it's set correctly.  In addition, this is simply unnecessary data bloat.

Moreover, the computation itself does not contain a pointer to the fusion instruction that references it, which leads to odd circumlocutions in the HloComputation code that retrieve the fusion instruction from the computation's root instruction.

Thus, this CL moves this pointer into the HloComputation class (replacing the is_fusion_computation_ bool value), and refactor the uses as necessary.

PiperOrigin-RevId: 167039280
---
 .../xla/service/elemental_ir_emitter.cc       |  2 +-
 .../xla/service/gpu/ir_emitter_unnested.cc    |  9 +--
 .../xla/service/gpu/while_transformer.cc      |  5 +-
 .../compiler/xla/service/hlo_computation.cc   | 25 +++-----
 .../compiler/xla/service/hlo_computation.h    | 21 ++++---
 .../compiler/xla/service/hlo_graph_dumper.cc  |  6 +-
 .../compiler/xla/service/hlo_instruction.cc   | 57 +++++--------------
 .../compiler/xla/service/hlo_instruction.h    | 27 ++-------
 .../xla/service/hlo_tfgraph_builder.cc        |  7 ++-
 9 files changed, 56 insertions(+), 103 deletions(-)

diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index 84bdd5acac..b02138325e 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -616,7 +616,7 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeRngElementGenerator(
 
   auto random_value = [hlo]() {
     const HloModule* module =
-        hlo->IsFused() ? hlo->fusion_instruction()->parent()->parent()
+        hlo->IsFused() ? hlo->parent()->FusionInstruction()->parent()->parent()
                        : hlo->parent()->parent();
     return module->RandomNew64();
   };
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index e7d32a4ae1..749badf3f2 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -894,7 +894,7 @@ Status IrEmitterUnnested::EmitColumnReduction(
     llvm_ir::SetToFirstInsertPoint(if_tile_in_bounds_data.after_block,
                                    &ir_builder_);
     const HloInstruction* output =
-        reduce->IsFused() ? reduce->fusion_instruction() : reduce;
+        reduce->IsFused() ? reduce->parent()->FusionInstruction() : reduce;
     llvm::Value* output_address = GetIrArray(*output).EmitArrayElementAddress(
         llvm_ir::IrArray::Index(x, output->shape(), &ir_builder_), &ir_builder_,
         "output_element_address");
@@ -1142,7 +1142,7 @@ Status IrEmitterUnnested::EmitRowReduction(
     }
 
     const HloInstruction* output =
-        reduce->IsFused() ? reduce->fusion_instruction() : reduce;
+        reduce->IsFused() ? reduce->parent()->FusionInstruction() : reduce;
 
     // Emit an atomic operation that accumulates the partial reduction result of
     // lane 0 (which holds the partially accumulated result for its warp) to the
@@ -1913,10 +1913,7 @@ Status IrEmitterUnnested::EmitTargetElementLoopInThunk(
     tuple_operand_ptrs.push_back(output_arrays[i].GetBasePointer());
   }
   ir_builder_.SetInsertPoint(ir_builder_.GetInsertBlock()->getTerminator());
-  //  const HloInstruction* root = hlo.fused_expression_root();
-  llvm_ir::EmitTuple(
-      GetIrArray(*hlo.fused_expression_root()->fusion_instruction()),
-      tuple_operand_ptrs, &ir_builder_);
+  llvm_ir::EmitTuple(GetIrArray(hlo), tuple_operand_ptrs, &ir_builder_);
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/gpu/while_transformer.cc b/tensorflow/compiler/xla/service/gpu/while_transformer.cc
index cecbb01ff8..ccdd171759 100644
--- a/tensorflow/compiler/xla/service/gpu/while_transformer.cc
+++ b/tensorflow/compiler/xla/service/gpu/while_transformer.cc
@@ -308,7 +308,7 @@ class WhileConditionComputationMatcher : public MatcherBase {
         GetTaggedInstruction("gte.fusion_param.param0", tagged_instructions));
     CHECK_EQ(HloOpcode::kParameter, gte_fusion_param0->opcode());
     CHECK(gte_fusion_param0->IsFused());
-    if (gte_fusion_param0->fusion_instruction()->operand(
+    if (gte_fusion_param0->parent()->FusionInstruction()->operand(
             gte_fusion_param0->parameter_number()) !=
         computation_->parameter_instruction(0)) {
       return InvalidArgument("Could not match fusion param: %s",
@@ -469,7 +469,8 @@ class WhileBodyComputationMatcher : public MatcherBase {
         // Fusion parameter: lookup and compare with associated fusion operand.
         CHECK_EQ(HloOpcode::kParameter, inst->opcode());
         CHECK(inst->IsFused());
-        if (inst->fusion_instruction()->operand(inst->parameter_number()) !=
+        if (inst->parent()->FusionInstruction()->operand(
+                inst->parameter_number()) !=
             computation_->parameter_instruction(0)) {
           return InvalidArgument("Could not match fusion param: %s",
                                  inst->name().c_str());
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index c030ceb72f..2d07784619 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -58,16 +58,16 @@ std::unique_ptr HloComputation::Builder::Build(
   CHECK_NE(nullptr, root);
 
   return WrapUnique(new HloComputation(name_, parameter_count, &instructions_,
-                                       root, is_fusion_computation_));
+                                       root, fusion_instruction_));
 }
 
 HloComputation::HloComputation(
     const string& name, int parameter_count,
     std::vector>* instructions,
-    HloInstruction* root_instruction, bool is_fusion_computation)
+    HloInstruction* root_instruction, HloInstruction* fusion_instruction)
     : name_(name),
       root_instruction_(root_instruction),
-      is_fusion_computation_(is_fusion_computation) {
+      fusion_instruction_(fusion_instruction) {
   param_instructions_.resize(parameter_count, nullptr);
   bool root_found = false;
   for (auto& instruction : *instructions) {
@@ -112,11 +112,8 @@ HloInstruction* HloComputation::AddInstructionInternal(
 HloInstruction* HloComputation::AddParameter(
     std::unique_ptr instruction) {
   CHECK(instruction->opcode() == HloOpcode::kParameter);
-  CHECK(is_fusion_computation_);
-  CHECK(root_instruction_->fusion_instruction() != nullptr);
-  instruction->SetParentFusion(root_instruction_->fusion_instruction());
-  CHECK(root_instruction_->fusion_instruction()->operand_count() ==
-        param_instructions_.size());
+  CHECK(IsFusionComputation());
+  CHECK(fusion_instruction_->operand_count() == param_instructions_.size());
   instruction->set_parent(this);
   param_instructions_.push_back(instruction.get());
   AddInstructionInternal(std::move(instruction));
@@ -126,8 +123,7 @@ HloInstruction* HloComputation::AddParameter(
 Status HloComputation::RemoveParameter(int64 param_no) {
   CHECK_GE(param_no, 0);
   CHECK_LT(param_no, param_instructions_.size());
-  CHECK(is_fusion_computation_);
-  CHECK(root_instruction_->fusion_instruction() != nullptr);
+  CHECK(IsFusionComputation());
   HloInstruction* param_instruction = param_instructions_[param_no];
   auto param_instruction_iterator = param_instructions_.begin() + param_no;
   param_instructions_.erase(param_instruction_iterator);
@@ -155,7 +151,6 @@ Status HloComputation::RemoveParameter(int64 param_no) {
         AddInstructionInternal(HloInstruction::CreateParameter(
             param_no, param_instruction->shape(), param_name));
     TF_RETURN_IF_ERROR(param_instruction->ReplaceAllUsesWith(new_instr));
-    new_instr->SetParentFusion(root_instruction_->fusion_instruction());
     param_instructions_[param_no] = new_instr;
     TF_RETURN_IF_ERROR(RemoveInstruction(param_instruction));
     param_no++;
@@ -166,10 +161,6 @@ Status HloComputation::RemoveParameter(int64 param_no) {
 
 void HloComputation::Reparent(HloInstruction* instruction) {
   instruction->set_parent(this);
-  if (is_fusion_computation_ && instruction != root_instruction_) {
-    CHECK(root_instruction_->fusion_instruction() != nullptr);
-    instruction->SetParentFusion(root_instruction_->fusion_instruction());
-  }
 }
 
 bool HloComputation::IsRemovable(const HloInstruction* instruction) {
@@ -182,7 +173,7 @@ bool HloComputation::IsRemovable(const HloInstruction* instruction) {
   }
 
   if (instruction->opcode() == HloOpcode::kParameter &&
-      !is_fusion_computation_) {
+      !IsFusionComputation()) {
     return false;
   }
 
@@ -267,7 +258,7 @@ void HloComputation::set_root_instruction(
     HloInstruction* new_root_instruction) {
   // The shape of the root (ignoring layout) is an invariant of the computation
   // for non-fusion cases.
-  if (!is_fusion_computation_) {
+  if (!IsFusionComputation()) {
     CHECK(ShapeUtil::Compatible(new_root_instruction->shape(),
                                 root_instruction_->shape()))
         << new_root_instruction->shape().ShortDebugString()
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index f383a17fb8..576c44a9f3 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -56,10 +56,11 @@ class HloComputation {
   // Builder class for HloComputation.
   class Builder {
    public:
-    explicit Builder(const string& name, bool is_fusion_computation = false)
+    explicit Builder(const string& name,
+                     HloInstruction* fusion_instruction = nullptr)
         : name_(name),
           last_added_instruction_(nullptr),
-          is_fusion_computation_(is_fusion_computation) {}
+          fusion_instruction_(fusion_instruction) {}
 
     // Build and return an HloComputation. The parameter root_instruction
     // specifies the already-added instruction to use as the root. If
@@ -78,7 +79,7 @@ class HloComputation {
    private:
     const string name_;
     HloInstruction* last_added_instruction_;
-    bool is_fusion_computation_;
+    HloInstruction* fusion_instruction_;
     std::vector> instructions_;
   };
 
@@ -274,13 +275,18 @@ class HloComputation {
   bool HasSideEffect() const;
 
   // Returns if this computation is a fusion computation.
-  bool IsFusionComputation() const { return is_fusion_computation_; }
+  bool IsFusionComputation() const { return fusion_instruction_ != nullptr; }
+
+  // Returns the owning fusion instruction, or nullptr if this is not a fusion
+  // computation.
+  HloInstruction* FusionInstruction() const { return fusion_instruction_; }
 
  private:
   explicit HloComputation(
       const string& name, int parameter_count,
       std::vector>* instructions,
-      HloInstruction* root_instruction, bool is_fusion_computation = false);
+      HloInstruction* root_instruction,
+      HloInstruction* fusion_instruction = nullptr);
 
   // Internal helper for adding instructions.
   HloInstruction* AddInstructionInternal(
@@ -309,8 +315,9 @@ class HloComputation {
   string name_;
   HloInstruction* root_instruction_;
 
-  // A tag shows if this is a fusion computation.
-  bool is_fusion_computation_;
+  // If this computation is a fusion computation, this field points to the
+  // corresponding fusion instruction.  Otherwise, this is null.
+  HloInstruction* fusion_instruction_;
 
   // Module containing this computation.
   HloModule* parent_ = nullptr;
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 24a47f80af..dfb111d1d0 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -479,7 +479,7 @@ stylesheet="
     // If this edge crosses a fusion cluster boundary, highlight it when the
     // cluster is hovered over.
     if (from_node->IsFused() &&
-        from_node->fusion_instruction()->fused_expression_root() == from_node) {
+        from_node->parent()->root_instruction() == from_node) {
       int64 cluster_id = cluster_ids_.at(from_node->parent());
       add_hover_css_rule("clust", cluster_id, kBlue);
     }
@@ -657,7 +657,7 @@ string HloDotDumper::GetInstructionNodeInlinedConstants(
   // Special case: If instr is a parameter to a fusion node, check whether the
   // corresponding operand to the fusion node is a constant.
   if (instr->opcode() == HloOpcode::kParameter && instr->IsFused()) {
-    const HloInstruction* fusion = instr->fusion_instruction();
+    const HloInstruction* fusion = instr->parent()->FusionInstruction();
     const HloInstruction* operand = fusion->operand(instr->parameter_number());
     if (operand->opcode() != HloOpcode::kConstant) {
       return "";
@@ -898,7 +898,7 @@ void HloDotDumper::AddInstructionIncomingEdges(const HloInstruction* instr) {
   // expressions are handled specially -- we draw an edge from the corresponding
   // operand on the fusion node itself to the parameter.
   if (instr->opcode() == HloOpcode::kParameter && instr->IsFused()) {
-    const HloInstruction* fusion = instr->fusion_instruction();
+    const HloInstruction* fusion = instr->parent()->FusionInstruction();
     add_edge(fusion->operand(instr->parameter_number()), instr,
              /*operand_num=*/0);
   } else {
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 237e745383..3bdb67ba92 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -649,16 +649,14 @@ HloInstruction* HloInstruction::CloneAndFuseInternal(
   if (called_computations_.empty()) {
     // New fusion instruction. It should not be a multioutput instruction.
     CHECK(!add_output);
-    auto builder = HloComputation::Builder("fused_computation", true);
+    auto builder = HloComputation::Builder("fused_computation", this);
     builder.AddInstruction(instruction_to_fuse->Clone(/*suffix=*/""));
     called_computations_.push_back(
         CHECK_NOTNULL(GetModule())->AddEmbeddedComputation(builder.Build()));
     clone = fused_expression_root();
-    clone->parent_fusion_instruction_ = this;
   } else {
     clone = fused_instructions_computation()->AddInstruction(
         instruction_to_fuse->Clone(/*suffix=*/""));
-    clone->parent_fusion_instruction_ = this;
     // When add_output is false, instruction_to_fuse is necessarily an operand
     // of the fusion instruction. After fusion this will no longer be the case.
     // Remove the operand from the operand list and remove its corresponding
@@ -727,12 +725,8 @@ HloInstruction* HloInstruction::CloneAndFuseInternal(
       // to avoid a double %%.
       string param_name =
           StrCat(operand->name().substr(1), ".param_", param_no);
-      std::unique_ptr param_instruction =
-          CreateParameter(param_no, operand->shape(), param_name);
-
-      param_instruction->parent_fusion_instruction_ = this;
       fused_param = fused_instructions_computation()->AddParameter(
-          std::move(param_instruction));
+          CreateParameter(param_no, operand->shape(), param_name));
       AppendOperand(operand);
     }
     TF_CHECK_OK(clone->ReplaceOperandWith(operand_num, fused_param));
@@ -762,7 +756,6 @@ HloInstruction* HloInstruction::CloneAndFuseInternal(
         HloInstruction::CreateTuple(tuple_elements));
     fused_instructions_computation()->set_root_instruction(new_root);
     shape_ = new_root->shape();
-    new_root->parent_fusion_instruction_ = this;
     if (fused_root->opcode() == HloOpcode::kTuple) {
       TF_CHECK_OK(
           fused_instructions_computation()->RemoveInstruction(fused_root));
@@ -839,24 +832,17 @@ bool HloInstruction::HasSideEffect() const {
 void HloInstruction::CheckFusionInstruction() const {
   CHECK_EQ(opcode_, HloOpcode::kFusion);
 
-  const std::list>& fused_instructions_ =
-      fused_instructions_computation()->instructions();
-  // All instructions owned by this fusion instruction must be fused, and the
-  // parent fusion instruction of the fused instructions must be 'this'.
-  for (auto& instruction : fused_instructions_) {
-    CHECK(instruction->IsFused());
-    CHECK_EQ(this, instruction->fusion_instruction());
-    CHECK_EQ(fused_instructions_computation(), instruction->parent())
-        << instruction->ToString();
-  }
+  // The parent fusion instruction of the fusion computation must be 'this'.
+  HloComputation* fused_computation = fused_instructions_computation();
+  CHECK_EQ(this, fused_computation->FusionInstruction());
 
   // Fused root instruction and fused parameters must all be owned by the fusion
-  // instruction.
+  // computation.
   bool root_owned = false;
   const std::vector& fused_parameters_ = fused_parameters();
   const HloInstruction* fused_root_ = fused_expression_root();
   std::vector parameter_owned(fused_parameters_.size(), false);
-  for (auto& instruction : fused_instructions_) {
+  for (auto& instruction : fused_computation->instructions()) {
     if (fused_root_ == instruction.get()) {
       CHECK(!root_owned);
       root_owned = true;
@@ -877,14 +863,13 @@ void HloInstruction::CheckFusionInstruction() const {
   // Fused root must have no users.
   CHECK_EQ(0, fused_root_->user_count());
 
-  // All uses of fused instructions must be in the fusion instruction, and every
+  // All uses of fused instructions must be in the fusion computation, and every
   // non-root instruction must have at least one use.
-  for (auto& instruction : fused_instructions_) {
+  for (auto& instruction : fused_instructions_computation()->instructions()) {
     if (instruction.get() != fused_root_) {
       CHECK_GT(instruction->user_count(), 0);
       for (auto& user : instruction->users()) {
-        CHECK(user->IsFused());
-        CHECK_EQ(this, user->fusion_instruction());
+        CHECK_EQ(fused_computation, user->parent());
       }
     }
   }
@@ -1173,15 +1158,10 @@ std::unique_ptr HloInstruction::CloneFusionWithNewOperands(
   std::list> new_fused_instructions;
   // Create the list of fused parameters by mapping through the cloned,
   // fused instructions.
-  std::vector new_fused_parameters;
-  const std::vector& fused_parameters_ =
-      fused_instructions_computation()->parameter_instructions();
-
-  for (HloInstruction* old_fused_parameter : fused_parameters_) {
+  for (HloInstruction* old_fused_parameter :
+       fused_instructions_computation()->parameter_instructions()) {
     new_fused_instructions.push_back(old_fused_parameter->Clone());
     HloInstruction* new_fusion_parameter = new_fused_instructions.back().get();
-    new_fusion_parameter->parent_fusion_instruction_ = new_instruction.get();
-    new_fused_parameters.push_back(new_fusion_parameter);
     InsertOrDie(&old_to_new, old_fused_parameter, new_fusion_parameter);
   }
   for (auto old_fused_instruction :
@@ -1202,12 +1182,12 @@ std::unique_ptr HloInstruction::CloneFusionWithNewOperands(
             old_fused_instruction->shape(), new_operands));
     HloInstruction* new_fused_instruction = new_fused_instructions.back().get();
     new_fused_instruction->set_parent(parent());
-    new_fused_instruction->parent_fusion_instruction_ = new_instruction.get();
     InsertOrDie(&old_to_new, old_fused_instruction, new_fused_instruction);
   }
   new_instruction->fusion_kind_ = fusion_kind_;
   auto computation_builder = HloComputation::Builder(
-      fused_instructions_computation()->name() + ".clone", true);
+      fused_instructions_computation()->name() + ".clone",
+      new_instruction.get());
   // We iterated the fusion instructions in reverse post order which means
   // that we must reverse our new list of fusion instructions.
   for (auto new_fused_instruction_iter = new_fused_instructions.rbegin();
@@ -1912,9 +1892,7 @@ string HloInstruction::TracingTag() const {
   return literal_->u8s_string();
 }
 
-bool HloInstruction::IsFused() const {
-  return parent_fusion_instruction_ != nullptr;
-}
+bool HloInstruction::IsFused() const { return parent_->IsFusionComputation(); }
 
 bool HloInstruction::IsFusable() const {
   // Instructions which are traced should not be fused.
@@ -1949,11 +1927,6 @@ HloComputation* HloInstruction::fused_instructions_computation() const {
   return fused_instructions_computation;
 }
 
-HloInstruction* HloInstruction::fusion_instruction() const {
-  CHECK(IsFused());
-  return parent_fusion_instruction_;
-}
-
 HloInstruction* HloInstruction::fused_expression_root() const {
   CHECK_EQ(opcode_, HloOpcode::kFusion);
   return fused_instructions_computation()->root_instruction();
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 923aeb47f0..5688fcc425 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -603,26 +603,21 @@ class HloInstruction {
   // instruction.
   bool IsFused() const;
 
+  // Returns the computation for this fused instruction.
+  //
+  // Precondition: opcode() == HloOpcode::kFusion
+  HloComputation* fused_instructions_computation() const;
+
   // Returns true if this instruction can be legally fused into a fusion
   // instruction.
   bool IsFusable() const;
 
-  // Returns the fusion instruction that contains this instruction.
-  //
-  // Note: only valid if this instruction is fused into a fusion instruction.
-  HloInstruction* fusion_instruction() const;
-
   // Returns the root instruction of the fused expression contained within this
   // fusion instruction.
   //
   // Precondition: opcode() == HloOpcode::kFusion
   HloInstruction* fused_expression_root() const;
 
-  // Returns the computation for this fused instruction.
-  //
-  // Precondition: opcode() == HloOpcode::kFusion
-  HloComputation* fused_instructions_computation() const;
-
   // Returns the list of fused instructions inside this fusioninstruction.
   //
   // Note: although the list itself is const, the instructions contained in the
@@ -898,14 +893,6 @@ class HloInstruction {
   // instruction to make it a bitcast.
   bool CouldBeBitcast() const;
 
-  // Sets the parent fusion instruction for this instruction.
-  //
-  // Precondition: opcode() == HloOpcode::kFusion
-  void SetParentFusion(HloInstruction* fusion_instruction) {
-    CHECK_EQ(HloOpcode::kFusion, fusion_instruction->opcode());
-    parent_fusion_instruction_ = fusion_instruction;
-  }
-
   // CHECKs various invariants of a fusion instruction.
   void CheckFusionInstruction() const;
 
@@ -1049,10 +1036,6 @@ class HloInstruction {
   // padding of this pad instruction. Only set for pad instructions.
   std::unique_ptr padding_config_;
 
-  // If this instruction is fused into a fusion instruction, this field points
-  // to the fusion instruction.
-  HloInstruction* parent_fusion_instruction_ = nullptr;
-
   // The type of the fusion. Used by kFusion only.
   FusionKind fusion_kind_;
 
diff --git a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc
index 76177462aa..5a4c93b59a 100644
--- a/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc
+++ b/tensorflow/compiler/xla/service/hlo_tfgraph_builder.cc
@@ -91,10 +91,11 @@ const string& HloTfGraphBuilder::GetNodeNameForInstruction(
   string node_name;
   // If an instruction is fused, put it in the subgraph of the fusion;
   // otherwise, put it in the computation subgraph.
-  if (instruction->IsFused()) {
-    node_name = GetNodeNameForInstruction(instruction->fusion_instruction());
+  const HloComputation* computation = instruction->parent();
+  if (computation->IsFusionComputation()) {
+    node_name = GetNodeNameForInstruction(computation->FusionInstruction());
   } else {
-    node_name = instruction->parent()->name();
+    node_name = computation->name();
     if (!instruction->metadata().op_name().empty()) {
       // Always make computations contain TF ops but not the other way around.
       StrAppend(&node_name, "/", instruction->metadata().op_name());
-- 
GitLab


From 1bc5cff9dba8baff9c1476c1708adfeb7898d41a Mon Sep 17 00:00:00 2001
From: Tayo Oguntebi 
Date: Wed, 30 Aug 2017 13:25:56 -0700
Subject: [PATCH 112/294] [XLA] Generalize pooling to support N-dimensional
 inputs.

Extends the usage of windowed pipeline emitter in pooling to support arbitrary
input dimensions > 3. Users should note that memory usage compounds quickly
with very-high dimensional tensors.

Adds an HLO evaluator test for the R6 pooling case.

PiperOrigin-RevId: 167040226
---
 .../compiler/xla/service/hlo_evaluator.cc     |  4 +-
 .../xla/service/hlo_evaluator_test.cc         | 64 +++++++++++++++++++
 tensorflow/compiler/xla/tests/BUILD           |  1 +
 .../compiler/xla/tests/reduce_window_test.cc  | 63 ++++++++++++++++++
 4 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 31cb64d879..e09c9d3beb 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -912,9 +912,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     TF_RET_CHECK(
         ShapeUtil::Compatible(reduce_window->shape(), inferred_return_shape))
         << "return shape is set to: "
-        << ShapeUtil::HumanString(reduce_window->shape())
+        << ShapeUtil::HumanStringWithLayout(reduce_window->shape())
         << "but is inferred to be: "
-        << ShapeUtil::HumanString(inferred_return_shape);
+        << ShapeUtil::HumanStringWithLayout(inferred_return_shape);
 
     const Literal& operand_literal =
         parent_->GetEvaluatedLiteralFor(reduce_window->operand(0));
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index 9d33236df5..a826548349 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -1207,6 +1207,70 @@ TEST_F(HloEvaluatorTest, ReduceWindowAdd) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
+TEST_F(HloEvaluatorTest, ReduceWindowAdd6D) {
+  HloComputation::Builder b(TestName());
+
+  // arg: f32[4,4,4,4,4,4] full of ones. Using small dims to limit run-time.
+  std::vector input_dims(6, 4);
+  std::unique_ptr arg_literal =
+      Literal::CreateFullWithMonotonicDim0MajorLayout(input_dims, 1.0f);
+
+  HloInstruction* arg_instruction =
+      b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal)));
+
+  auto init_value = b.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0(0.f)));
+
+  HloComputation::Builder add_computation("add");
+  Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  auto param_lhs = add_computation.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "lhs"));
+  auto param_rhs = add_computation.AddInstruction(
+      HloInstruction::CreateParameter(1, scalar_shape, "rhs"));
+  add_computation.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape, HloOpcode::kAdd, param_lhs, param_rhs));
+  HloModule module(TestName());
+  auto add_func = module.AddEmbeddedComputation(add_computation.Build());
+
+  Window window;
+
+  WindowDimension trivial_dim;
+  trivial_dim.set_size(1);
+  trivial_dim.set_stride(1);
+  trivial_dim.set_padding_low(0);
+  trivial_dim.set_padding_high(0);
+  trivial_dim.set_window_dilation(1);
+  trivial_dim.set_base_dilation(1);
+
+  WindowDimension active_dim;
+  active_dim.set_size(2);
+  active_dim.set_stride(1);
+  active_dim.set_padding_low(0);
+  active_dim.set_padding_high(0);
+  active_dim.set_window_dilation(1);
+  active_dim.set_base_dilation(1);
+
+  *window.add_dimensions() = trivial_dim;
+  *window.add_dimensions() = active_dim;
+  *window.add_dimensions() = active_dim;
+  *window.add_dimensions() = active_dim;
+  *window.add_dimensions() = trivial_dim;
+  *window.add_dimensions() = trivial_dim;
+
+  Shape shape = ShapeUtil::MakeShape(F32, {4, 3, 3, 3, 4, 4});
+  b.AddInstruction(HloInstruction::CreateReduceWindow(
+      shape, arg_instruction, init_value, window, add_func));
+
+  auto computation = module.AddEntryComputation(b.Build());
+  std::unique_ptr result =
+      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+
+  std::vector output_dims = {4, 3, 3, 3, 4, 4};
+  std::unique_ptr result_literal =
+      Literal::CreateFullWithMonotonicDim0MajorLayout(output_dims, 8.0f);
+  LiteralTestUtil::ExpectEqual(*result_literal, *result);
+}
+
 TEST_F(HloEvaluatorTest, StridedSlice) {
   HloComputation::Builder b(TestName());
 
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 0a2d337752..52b2027aec 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -899,6 +899,7 @@ xla_test_library(
         "//tensorflow/compiler/xla/client:padding",
         "//tensorflow/compiler/xla/client/lib:arithmetic",
         "//tensorflow/compiler/xla/tests:client_library_test_base",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:literal_test_util",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc
index 60d6d19ce1..6ef5c4a8c8 100644
--- a/tensorflow/compiler/xla/tests/reduce_window_test.cc
+++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/reference_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
 #include "tensorflow/compiler/xla/tests/literal_test_util.h"
 #include "tensorflow/compiler/xla/tests/test_macros.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
@@ -319,6 +320,68 @@ TEST_F(ReduceWindowTest, R4UnitWindow) {
                              ErrorSpec(1e-3, 1e-3));
 }
 
+XLA_TEST_F(HloTestBase, R6Add) {
+  auto b = HloComputation::Builder(TestName());
+
+  std::vector input_dims(6, 8);
+  std::unique_ptr arg_literal =
+      Literal::CreateFullWithMonotonicDim0MajorLayout(input_dims, 1.0f);
+  auto input =
+      b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal)));
+
+  auto init_value = b.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0(0.f)));
+
+  HloComputation::Builder add_computation("add");
+  Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  auto param_lhs = add_computation.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "lhs"));
+  auto param_rhs = add_computation.AddInstruction(
+      HloInstruction::CreateParameter(1, scalar_shape, "rhs"));
+  add_computation.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape, HloOpcode::kAdd, param_lhs, param_rhs));
+
+  auto module = CreateNewModule();
+  auto add_func = module->AddEmbeddedComputation(add_computation.Build());
+
+  WindowDimension trivial_dim;
+  trivial_dim.set_size(1);
+  trivial_dim.set_stride(1);
+  trivial_dim.set_padding_low(0);
+  trivial_dim.set_padding_high(0);
+  trivial_dim.set_window_dilation(1);
+  trivial_dim.set_base_dilation(1);
+
+  WindowDimension active_dim;
+  active_dim.set_size(3);
+  active_dim.set_stride(1);
+  active_dim.set_padding_low(0);
+  active_dim.set_padding_high(0);
+  active_dim.set_window_dilation(1);
+  active_dim.set_base_dilation(1);
+
+  Window window;
+  *window.add_dimensions() = trivial_dim;
+  *window.add_dimensions() = trivial_dim;
+  *window.add_dimensions() = active_dim;
+  *window.add_dimensions() = active_dim;
+  *window.add_dimensions() = trivial_dim;
+  *window.add_dimensions() = trivial_dim;
+
+  Shape shape = ShapeUtil::MakeShape(F32, {8, 8, 6, 6, 8, 8});
+  b.AddInstruction(HloInstruction::CreateReduceWindow(shape, input, init_value,
+                                                      window, add_func));
+
+  std::vector output_dims = {8, 8, 6, 6, 8, 8};
+  std::unique_ptr expected =
+      Literal::CreateFullWithMonotonicDim0MajorLayout(output_dims, 9.0f);
+
+  module->AddEntryComputation(b.Build());
+  auto actual = ExecuteAndTransfer(std::move(module), {});
+
+  LiteralTestUtil::ExpectNear(*actual, *expected, ErrorSpec(1e-3, 1e-3));
+}
+
 XLA_TEST_F(ReduceWindowTest, R4SecondMinorStride) {
   Array4D input_array(2, 1, 27, 119);
   input_array.FillRandom(2.0f);
-- 
GitLab


From f22716be4c386c66c38699c76ab99e457dc369f8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 13:32:07 -0700
Subject: [PATCH 113/294] Fix linter error in linalg_grad.py that broke
 opensource build.

PiperOrigin-RevId: 167041045
---
 tensorflow/python/ops/linalg_grad.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py
index b7ae5665f1..b5e4e0e0af 100644
--- a/tensorflow/python/ops/linalg_grad.py
+++ b/tensorflow/python/ops/linalg_grad.py
@@ -143,7 +143,7 @@ def _MatrixSolveLsGrad(op, grad):
     l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
     # pylint: disable=protected-access
     chol = linalg_ops._RegularizedGramianCholesky(
-        a, l2_regularizer=l2_regularizer, fist_kind=False)
+        a, l2_regularizer=l2_regularizer, first_kind=False)
     # pylint: enable=protected-access
     grad_b = linalg_ops.cholesky_solve(chol, math_ops.matmul(a, grad))
     # Temporary tmp = (A * A^T + lambda * I)^{-1} * B.
-- 
GitLab


From 8f25fa964751c56a50a6c9c9d420927b98fadf65 Mon Sep 17 00:00:00 2001
From: Alexandre Passos 
Date: Wed, 30 Aug 2017 13:41:13 -0700
Subject: [PATCH 114/294] Fixes eager higher-order gradients for ops whose
 gradient function uses their outputs.

PiperOrigin-RevId: 167042517
---
 tensorflow/python/eager/backprop.py        |  8 ++++----
 tensorflow/python/eager/backprop_test.py   | 10 ++++++++++
 tensorflow/python/eager/custom_gradient.py | 10 +++++++++-
 tensorflow/python/eager/function.py        | 12 +++++++++---
 tensorflow/python/eager/tape.py            |  5 +++--
 5 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index b0e8a41cb5..ca3ad1a2c3 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -146,17 +146,17 @@ def _record_gradient(op_name, inputs, attrs, results, name):
   # It is imperative we make a copy of results here as otherwise we create a
   # dependency cycle in the captured function and this can delay garbage
   # collecting of the tensors arbitrarily.
-  result_copies = results[:]
+  results_size = len(results) if isinstance(results, (list, tuple)) else 1
 
-  def grad_fn(*outputs):
+  def grad_fn(*orig_outputs):
     """Generated gradient function."""
-    tensors = inputs + result_copies + list(outputs)
+    tensors = inputs + list(orig_outputs)
     tensors = container_types.make_sequence(tape.EagerList, *tensors)
     result = _magic_gradient_function(op_name, attrs, len(inputs),
                                       num_outputs, *(tensors))
     if _tracing:
       print("Gradient for", (name if name else op_name), "inputs", inputs,
-            "output_grads", outputs)
+            "output_grads", orig_outputs[results_size:], "gradients", result)
     return result
 
   results = tape.record_operation(results, inputs, [], grad_fn)
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 8a13f9c068..010124ed56 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -174,6 +174,16 @@ class BackpropTest(test.TestCase):
 
     self.assertAllEqual(gradgrad(tensor.Tensor(3.0))[0].numpy(), 2.0)
 
+  def testGradGradExp(self):
+
+    def grad(x):
+      value = backprop.gradients_function(math_ops.exp, [0])(x)[0]
+      return value
+
+    gradgrad = backprop.gradients_function(grad, [0])
+
+    self.assertAllEqual(gradgrad(tensor.Tensor(0.0))[0].numpy(), 1.0)
+
   def testGPU(self):
     if not context.context().num_gpus():
       self.skipTest('No GPUs found')
diff --git a/tensorflow/python/eager/custom_gradient.py b/tensorflow/python/eager/custom_gradient.py
index 39fd845efc..0c921bb023 100644
--- a/tensorflow/python/eager/custom_gradient.py
+++ b/tensorflow/python/eager/custom_gradient.py
@@ -56,6 +56,14 @@ def custom_gradient(f):
                      if isinstance(x, (_tensor.Tensor, tf_ops.Tensor))
                      or ag_core.isnode(x)]
     result, grad_fn = f(*args, **kwargs)
+    result_size = len(result) if isinstance(result, (list, tuple)) else 1
+
+    # TODO(apassos): naive uses of custom_gradient will not get the correct
+    # second derivative this way if they capture any output tensors. Change the
+    # signature of custom_gradient.
+    def actual_grad_fn(*outputs):
+      outputs = outputs[result_size:]
+      return grad_fn(*outputs)
 
     flat_result = nest.flatten(result)
     flat_result = [ag_core.getval(x) for x in flat_result]
@@ -63,7 +71,7 @@ def custom_gradient(f):
         flat_result,
         input_tensors,
         [],
-        grad_fn)
+        actual_grad_fn)
     flat_result = list(flat_result)
     return nest.pack_sequence_as(structure=result, flat_sequence=flat_result)
 
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 60fd4957da..980b6c883f 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -276,9 +276,15 @@ class _GraphModeFunction(object):
           break
       else:  # Note: for-else here done on purpose
         watched_extra_inputs.append(t)
-    real_outputs = tape.record_operation(real_outputs,
-                                         (args + watched_extra_inputs),
-                                         side_outputs, self._backward_function)
+
+    def backward_function_wrapper(*outputs):
+      outputs = outputs[len(real_outputs):]
+      return self._backward_function(*outputs)
+    real_outputs = tape.record_operation(
+        real_outputs,
+        (args + watched_extra_inputs),
+        side_outputs,
+        backward_function_wrapper)
 
     return self._build_call_outputs(self._returns, real_outputs)
 
diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py
index 6fd19aee45..4d09db73c9 100644
--- a/tensorflow/python/eager/tape.py
+++ b/tensorflow/python/eager/tape.py
@@ -232,9 +232,10 @@ def record_operation(o, i, s, b):
 def _record_operation_vjp(g, ans, vs, gvs, output_tensors, input_tensors,
                           side_outputs, backward_function):
   """Gradient for _record_operation."""
-  del ans, vs, gvs, output_tensors, input_tensors
+  del vs, gvs, input_tensors, output_tensors
   backward_args = tuple(g) + tuple(side_outputs)
-  backward_args = container_types.make_sequence(EagerList, *backward_args)
+  backward_args = container_types.make_sequence(
+      EagerList, *(tuple(ans) + backward_args))
   tensors = nest.flatten(backward_function(*backward_args))
   return container_types.make_sequence(EagerList, *tensors)
 
-- 
GitLab


From ac2c94fb30c8e8b44ca40b01082016813eea18d8 Mon Sep 17 00:00:00 2001
From: Yao Zhang 
Date: Wed, 30 Aug 2017 13:59:20 -0700
Subject: [PATCH 115/294] Handle duplicated inputs in topological sort. And do
 not add the redundant control dependencies. The would result in malfunction
 of topological sort as it previously doesn't handle duplicated inputs. For
 example, say node A has three repeated input ^B, node A will never get added
 to queue in topological sort, because the number of ready inputs will always
 be less than the number of inputs (B is only counted once). node {   name:
 "A"     op: "SomeOp"     input:"^B"     input:"^B"     input:"^B" }

PiperOrigin-RevId: 167045325
---
 .../grappler/optimizers/constant_folding.cc   |  8 +++-
 .../optimizers/constant_folding_test.cc       | 37 +++++++++++++++++++
 tensorflow/core/grappler/utils.cc             | 35 ++++++++++++++++++
 tensorflow/core/grappler/utils.h              | 16 ++++++++
 .../core/grappler/utils/topological_sort.cc   | 11 ++++--
 .../grappler/utils/topological_sort_test.cc   | 12 ++++++
 6 files changed, 113 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 35b0b7c163..443c0b72ab 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -568,12 +568,16 @@ Status ConstantFolding::FoldNode(NodeDef* node) {
 
     // Forward control dependencies.
     for (const auto& input : node->input()) {
-      if (IsControlInput(input)) {
+      if (IsControlInput(input) &&
+          std::find(const_node->input().begin(), const_node->input().end(),
+                    input) == const_node->input().end()) {
         *const_node->add_input() = input;
       } else {
         NodeDef* input_node = node_map_->GetNode(input);
         for (const auto& fanin_of_input : input_node->input()) {
-          if (IsControlInput(fanin_of_input)) {
+          if (IsControlInput(fanin_of_input) &&
+              std::find(const_node->input().begin(), const_node->input().end(),
+                        fanin_of_input) == const_node->input().end()) {
             *const_node->add_input() = fanin_of_input;
           }
         }
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index 2db843f2a4..0f7e7f1d49 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -235,6 +235,43 @@ TEST_F(ConstantFoldingTest, ControlDependenciesEmptyFetch) {
   EXPECT_EQ(2, found);
 }
 
+TEST_F(ConstantFoldingTest, ControlDependenciesDeduplicate) {
+  tensorflow::Scope scope = tensorflow::Scope::NewRootScope();
+  Output dflt = ops::Const(scope.WithOpName("dflt"), 3.14f, {1});
+  Output p1 = ops::PlaceholderWithDefault(scope.WithOpName("p1"), dflt, {1});
+  Output p2 = ops::PlaceholderWithDefault(scope.WithOpName("p2"), dflt, {1});
+  Output c =
+      ops::Const(scope.WithOpName("c").WithControlDependencies(p1), 10, {3});
+  Output i1 = ops::Identity(scope.WithOpName("i1")
+                                .WithControlDependencies(p2)
+                                .WithControlDependencies(p1),
+                            {c});
+  Output i2 = ops::Identity(scope.WithOpName("i2"), {i1});
+
+  GrapplerItem item;
+  item.fetch.push_back("i2");
+  TF_CHECK_OK(scope.ToGraphDef(&item.graph));
+
+  ConstantFolding fold;
+  GraphDef output;
+  Status status = fold.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  std::vector expected_nodes = {"dflt", "p1", "p2", "i1", "i2"};
+  EXPECT_EQ(output.node_size(), expected_nodes.size());
+  int i = 0;
+  for (const auto& node : output.node()) {
+    EXPECT_EQ(expected_nodes[i], output.node(i).name());
+    i++;
+    if (node.name() == "i1") {
+      EXPECT_EQ("Const", node.op());
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ("^p1", node.input(0));
+      EXPECT_EQ("^p2", node.input(1));
+    }
+  }
+}
+
 TEST_F(ConstantFoldingTest, VariableNumberOfOutputs) {
   tensorflow::Scope scope = tensorflow::Scope::NewRootScope();
   // Add a DynamicPartition node to the graph
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 9e15744fab..c8830e9b3c 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -95,6 +95,41 @@ void NodeMap::UpdateOutput(const string& node_name,
   outputs.insert(nodes_[new_output_name]);
 }
 
+OutputMap::OutputMap(GraphDef* graph) : graph_(graph) {
+  for (int i = 0; i < graph_->node_size(); i++) {
+    auto node = graph_->mutable_node(i);
+    auto rslt = nodes_.insert(std::make_pair(node->name(), node));
+    // Check that the graph doesn't contain multiple nodes with the same name.
+    CHECK(rslt.second);
+    for (const auto& input : node->input()) {
+      string input_node = NodeName(input);
+      if (outputs_[input_node].count(node) == 0) {
+        outputs_[input_node].insert(std::make_pair(node, 1));
+      } else {
+        outputs_[input_node][node]++;
+      }
+    }
+  }
+}
+
+NodeDef* OutputMap::GetNode(const string& name) const {
+  string node_name = NodeName(name);
+  auto it = nodes_.find(node_name);
+  if (it == nodes_.end()) {
+    return nullptr;
+  }
+  return it->second;
+}
+
+const std::unordered_map& OutputMap::GetOutputs(
+    const string& node_name) const {
+  auto it = outputs_.find(node_name);
+  if (it == outputs_.end()) {
+    return empty_map_;
+  }
+  return it->second;
+}
+
 bool IsSameInput(const string& name1, const string& name2) {
   if (name1 == name2) {
     return true;
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index a9eccd685b..03f49c0ca2 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -52,6 +52,22 @@ class NodeMap {
   std::unordered_map> outputs_;
 };
 
+// A utility class to lookup a node's outputs and the number of times it
+// presents in each output.
+class OutputMap {
+ public:
+  explicit OutputMap(GraphDef* graph);
+  NodeDef* GetNode(const string& name) const;
+  const std::unordered_map& GetOutputs(
+      const string& node_name) const;
+
+ private:
+  GraphDef* graph_;
+  std::unordered_map empty_map_;
+  std::unordered_map nodes_;
+  std::unordered_map> outputs_;
+};
+
 // True iff 'name' refers to a control inputs, i.e. a node name prefixed with
 // the ^ character.
 bool IsControlInput(const string& name);
diff --git a/tensorflow/core/grappler/utils/topological_sort.cc b/tensorflow/core/grappler/utils/topological_sort.cc
index a5a7f34db0..77d4702d21 100644
--- a/tensorflow/core/grappler/utils/topological_sort.cc
+++ b/tensorflow/core/grappler/utils/topological_sort.cc
@@ -26,7 +26,7 @@ namespace grappler {
 // Kahn's algorithm is implemented.
 // For details, see https://en.wikipedia.org/wiki/Topological_sorting
 void TopologicalSort(GraphDef* graph) {
-  NodeMap node_map(graph);
+  OutputMap output_map(graph);
   std::vector ready_nodes;
   ready_nodes.reserve(graph->node_size());
   int front = 0;
@@ -41,7 +41,7 @@ void TopologicalSort(GraphDef* graph) {
     if (IsMerge(*node)) {
       ready_inputs[node] = 0;
       for (const auto& input : node->input()) {
-        if (IsNextIteration(*node_map.GetNode(input))) {
+        if (IsNextIteration(*output_map.GetNode(input))) {
           ready_inputs[node]++;
         }
       }
@@ -52,8 +52,9 @@ void TopologicalSort(GraphDef* graph) {
 
   while (front != back) {
     auto ready_node = ready_nodes[front];
-    for (const auto& fanout : node_map.GetOutputs(ready_node->name())) {
-      ready_inputs[fanout]++;
+    for (const auto& fanout_pair : output_map.GetOutputs(ready_node->name())) {
+      auto fanout = fanout_pair.first;
+      ready_inputs[fanout] += fanout_pair.second;
       if (ready_inputs[fanout] == fanout->input_size()) {
         ready_nodes.push_back(fanout);
         back++;
@@ -70,6 +71,8 @@ void TopologicalSort(GraphDef* graph) {
       new_node->Swap(ready_nodes[i]);
     }
     graph->mutable_node()->Swap(new_graph.mutable_node());
+  } else {
+    LOG(ERROR) << "The graph couldn't be sorted in topological order.";
   }
 }
 
diff --git a/tensorflow/core/grappler/utils/topological_sort_test.cc b/tensorflow/core/grappler/utils/topological_sort_test.cc
index 55f66b2734..dc99cb1052 100644
--- a/tensorflow/core/grappler/utils/topological_sort_test.cc
+++ b/tensorflow/core/grappler/utils/topological_sort_test.cc
@@ -89,6 +89,18 @@ TEST_F(TopologicalSortTest, WithIllegalLoop) {
   }
 }
 
+TEST_F(TopologicalSortTest, DuplicatedInputs) {
+  GraphDef graph;
+  *graph.add_node() = CreateNode("2", {"1", "1"});
+  *graph.add_node() = CreateNode("1", {});
+
+  TopologicalSort(&graph);
+  std::vector order = {"1", "2"};
+  for (int i = 0; i < order.size(); i++) {
+    EXPECT_EQ(graph.node(i).name(), order[i]);
+  }
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 79e6be9c99e1ed5a6e47cb7a2dd5fb40b98248e5 Mon Sep 17 00:00:00 2001
From: Shivam Kotwalia 
Date: Thu, 31 Aug 2017 02:41:05 +0530
Subject: [PATCH 116/294] Update README.md (#12633)

Edit the tutorial URL
---
 tensorflow/examples/speech_commands/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/examples/speech_commands/README.md b/tensorflow/examples/speech_commands/README.md
index 3b78210129..63be04ee58 100644
--- a/tensorflow/examples/speech_commands/README.md
+++ b/tensorflow/examples/speech_commands/README.md
@@ -1,4 +1,4 @@
 # Speech Commands Example
 
 This is a basic speech recognition example. For more information, see the
-tutorial at http://tensorflow.org/tutorials/audio_recognition.
+tutorial at https://www.tensorflow.org/versions/master/tutorials/audio_recognition.
-- 
GitLab


From db4afe53df7d6d37724a5a6d17dd33650dde99c2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 14:24:39 -0700
Subject: [PATCH 117/294] Make convolutional, pooling and normalization layers
 to work with EAGER.

PiperOrigin-RevId: 167049392
---
 tensorflow/python/layers/convolutional.py | 59 ++++++++++++-----------
 tensorflow/python/layers/normalization.py | 26 +++++++---
 2 files changed, 49 insertions(+), 36 deletions(-)

diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py
index 68293aa5fe..e46c0b2954 100644
--- a/tensorflow/python/layers/convolutional.py
+++ b/tensorflow/python/layers/convolutional.py
@@ -24,6 +24,7 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -1293,18 +1294,19 @@ class Conv2DTranspose(Conv2D):
         padding=self.padding.upper(),
         data_format=utils.convert_data_format(self.data_format, ndim=4))
 
-    # Infer the static output shape:
-    out_shape = inputs.get_shape().as_list()
-    out_shape[c_axis] = self.filters
-    out_shape[h_axis] = utils.deconv_output_length(out_shape[h_axis],
-                                                   kernel_h,
-                                                   self.padding,
-                                                   stride_h)
-    out_shape[w_axis] = utils.deconv_output_length(out_shape[w_axis],
-                                                   kernel_w,
-                                                   self.padding,
-                                                   stride_w)
-    outputs.set_shape(out_shape)
+    if context.in_graph_mode():
+      # Infer the static output shape:
+      out_shape = inputs.get_shape().as_list()
+      out_shape[c_axis] = self.filters
+      out_shape[h_axis] = utils.deconv_output_length(out_shape[h_axis],
+                                                     kernel_h,
+                                                     self.padding,
+                                                     stride_h)
+      out_shape[w_axis] = utils.deconv_output_length(out_shape[w_axis],
+                                                     kernel_w,
+                                                     self.padding,
+                                                     stride_w)
+      outputs.set_shape(out_shape)
 
     if self.bias:
       outputs = nn.bias_add(
@@ -1591,22 +1593,23 @@ class Conv3DTranspose(Conv3D):
         data_format=utils.convert_data_format(self.data_format, ndim=5),
         padding=self.padding.upper())
 
-    # Infer the static output shape:
-    out_shape = inputs.get_shape().as_list()
-    out_shape[c_axis] = self.filters
-    out_shape[d_axis] = utils.deconv_output_length(out_shape[d_axis],
-                                                   kernel_d,
-                                                   self.padding,
-                                                   stride_d)
-    out_shape[h_axis] = utils.deconv_output_length(out_shape[h_axis],
-                                                   kernel_h,
-                                                   self.padding,
-                                                   stride_h)
-    out_shape[w_axis] = utils.deconv_output_length(out_shape[w_axis],
-                                                   kernel_w,
-                                                   self.padding,
-                                                   stride_w)
-    outputs.set_shape(out_shape)
+    if context.in_graph_mode():
+      # Infer the static output shape:
+      out_shape = inputs.get_shape().as_list()
+      out_shape[c_axis] = self.filters
+      out_shape[d_axis] = utils.deconv_output_length(out_shape[d_axis],
+                                                     kernel_d,
+                                                     self.padding,
+                                                     stride_d)
+      out_shape[h_axis] = utils.deconv_output_length(out_shape[h_axis],
+                                                     kernel_h,
+                                                     self.padding,
+                                                     stride_h)
+      out_shape[w_axis] = utils.deconv_output_length(out_shape[w_axis],
+                                                     kernel_w,
+                                                     self.padding,
+                                                     stride_w)
+      outputs.set_shape(out_shape)
 
     if self.bias:
       outputs_shape = outputs.shape.as_list()
diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index 62f5881f16..1fc2d70f9c 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -25,6 +25,7 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import ops
@@ -242,15 +243,20 @@ class BatchNormalization(base.Layer):
                                   initializer=init_ops.zeros_initializer(),
                                   trainable=False)
           return var
+
         with ops.device(None):
-          with ops.device(lambda _: self.moving_mean.device):
+          device = ((lambda _: self.moving_mean.device)
+                    if context.in_graph_mode() else self.moving_mean.device)
+          with ops.device(device):
             self.renorm_mean = _renorm_variable('renorm_mean', (param_dim,))
             self.renorm_mean_weight = _renorm_variable('renorm_mean_weight', ())
           # We initialize renorm_stddev to 0, and maintain the (0-initialized)
           # renorm_stddev_weight. This allows us to (1) mix the average
           # stddev with the minibatch stddev early in training, and (2) compute
           # the unbiased average stddev by dividing renorm_stddev by the weight.
-          with ops.device(lambda _: self.moving_variance.device):
+          device = ((lambda _: self.moving_variance.device)
+                    if context.in_graph_mode() else self.moving_variance.device)
+          with ops.device(device):
             self.renorm_stddev = _renorm_variable('renorm_stddev', (param_dim,))
             self.renorm_stddev_weight = _renorm_variable(
                 'renorm_stddev_weight', ())
@@ -301,8 +307,12 @@ class BatchNormalization(base.Layer):
           self.moving_mean, mean, decay, zero_debias=False)
       variance_update = moving_averages.assign_moving_average(
           self.moving_variance, variance, decay, zero_debias=False)
-      self.add_update(mean_update, inputs=inputs)
-      self.add_update(variance_update, inputs=inputs)
+      if context.in_graph_mode():
+        # Note that in Eager mode, the updates are already executed when running
+        # assign_moving_averages. So we do not need to put them into
+        # collections.
+        self.add_update(mean_update, inputs=inputs)
+        self.add_update(variance_update, inputs=inputs)
 
     return output
 
@@ -335,6 +345,7 @@ class BatchNormalization(base.Layer):
     r = _smart_select(training, lambda: r, lambda: array_ops.ones_like(r))
     d = _smart_select(training, lambda: d, lambda: array_ops.zeros_like(d))
     decay = _smart_select(training, lambda: self.renorm_momentum, lambda: 1.)
+
     def _update_renorm_variable(var, weight, value):
       """Updates a moving average and weight, returns the unbiased value."""
       # Update the variables without zero debiasing. The debiasing will be
@@ -418,9 +429,9 @@ class BatchNormalization(base.Layer):
           self.moving_mean, new_mean, decay, zero_debias=False)
       variance_update = moving_averages.assign_moving_average(
           self.moving_variance, new_variance, decay, zero_debias=False)
-
-      self.add_update(mean_update, inputs=inputs)
-      self.add_update(variance_update, inputs=inputs)
+      if context.in_graph_mode():
+        self.add_update(mean_update, inputs=inputs)
+        self.add_update(variance_update, inputs=inputs)
 
     else:
       mean, variance = self.moving_mean, self.moving_variance
@@ -566,7 +577,6 @@ def batch_normalization(inputs,
 BatchNorm = BatchNormalization
 batch_norm = batch_normalization
 
-
 # Helper function
 
 
-- 
GitLab


From 7453a0fe7e31af6763748458aed21750bc1b4000 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 14:29:05 -0700
Subject: [PATCH 118/294] Fix conversion of string constants with nulls to
 EagerTensors. Fix testing on GPUs in constant_op_eager_test.

PiperOrigin-RevId: 167050082
---
 tensorflow/python/framework/ops.py            |  9 ++++
 tensorflow/python/framework/test_util.py      |  2 +-
 .../kernel_tests/constant_op_eager_test.py    | 50 ++++++++++++++-----
 3 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 5a0c323ce4..da3757190c 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -604,6 +604,13 @@ def _maybe_modify_numpy_dtype_determination(np_array):
   return np_array
 
 
+def _has_string(value):
+  if isinstance(value, compat.bytes_or_text_types): return True
+  if isinstance(value, collections.Sequence) and value:
+    return _has_string(value[0])
+  return False
+
+
 # TODO(agarwal): rename to TensorHandle.
 class EagerTensor(Tensor):
   """A TensorFlow Eager Tensor."""
@@ -625,6 +632,8 @@ class EagerTensor(Tensor):
     # https://www.tensorflow.org/code/tensorflow/python/framework/constant_op.py
     self._id = uid()
     if not isinstance(value, np.ndarray):
+      if dtype is None and _has_string(value):
+        dtype = dtypes.string
       npt = None if dtype is None else dtype.as_numpy_dtype
       try:
         value = np.array(value, dtype=npt)
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index c65816a543..73b7f821c8 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -65,7 +65,7 @@ def gpu_device_name():
   """Returns the name of a GPU device if available or the empty string."""
   for x in device_lib.list_local_devices():
     if x.device_type == "GPU" or x.device_type == "SYCL":
-      return x.name
+      return compat.as_str(x.name)
   return ""
 
 
diff --git a/tensorflow/python/kernel_tests/constant_op_eager_test.py b/tensorflow/python/kernel_tests/constant_op_eager_test.py
index 0e98afbe6e..0b4fa60d81 100644
--- a/tensorflow/python/kernel_tests/constant_op_eager_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_eager_test.py
@@ -26,27 +26,33 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_lib
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.util import compat
 
 
-# TODO(josh11b): add tests with string types, lists/tuples, Shape.
+# TODO(josh11b): add tests with lists/tuples, Shape.
 class ConstantTest(test.TestCase):
 
   def _testCpu(self, x):
     np_ans = np.array(x)
-    tf_ans = ops.convert_to_tensor(x).numpy()
+    with context.device("/device:CPU:0"):
+      tf_ans = ops.convert_to_tensor(x).numpy()
     if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]:
       self.assertAllClose(np_ans, tf_ans)
     else:
       self.assertAllEqual(np_ans, tf_ans)
 
   def _testGpu(self, x):
-    np_ans = np.array(x)
-    tf_ans = ops.convert_to_tensor(x).numpy()
-    if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]:
-      self.assertAllClose(np_ans, tf_ans)
-    else:
-      self.assertAllEqual(np_ans, tf_ans)
+    device = test_util.gpu_device_name()
+    if device:
+      np_ans = np.array(x)
+      with context.device(device):
+        tf_ans = ops.convert_to_tensor(x).numpy()
+      if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]:
+        self.assertAllClose(np_ans, tf_ans)
+      else:
+        self.assertAllEqual(np_ans, tf_ans)
 
   def _testAll(self, x):
     self._testCpu(x)
@@ -78,11 +84,11 @@ class ConstantTest(test.TestCase):
 
   def testComplex64(self):
     self._testAll(
-        np.complex(1, 2) * np.arange(-15, 15).reshape([2, 3, 5
-                                                      ]).astype(np.complex64))
+        np.complex(1, 2) *
+        np.arange(-15, 15).reshape([2, 3, 5]).astype(np.complex64))
     self._testAll(
-        np.complex(1, 2) * np.random.normal(size=30).reshape(
-            [2, 3, 5]).astype(np.complex64))
+        np.complex(1, 2) *
+        np.random.normal(size=30).reshape([2, 3, 5]).astype(np.complex64))
     self._testAll(np.empty((2, 0, 5)).astype(np.complex64))
 
   def testComplex128(self):
@@ -94,6 +100,26 @@ class ConstantTest(test.TestCase):
             [2, 3, 5]).astype(np.complex128))
     self._testAll(np.empty((2, 0, 5)).astype(np.complex128))
 
+  def testString(self):
+    val = [compat.as_bytes(str(x)) for x in np.arange(-15, 15)]
+    self._testCpu(np.array(val).reshape([2, 3, 5]))
+    self._testCpu(np.empty((2, 0, 5)).astype(np.str_))
+
+  def testStringWithNulls(self):
+    val = ops.convert_to_tensor(b"\0\0\0\0").numpy()
+    self.assertEqual(len(val), 4)
+    self.assertEqual(val, b"\0\0\0\0")
+
+    val = ops.convert_to_tensor(b"xx\0xx").numpy()
+    self.assertEqual(len(val), 5)
+    self.assertAllEqual(val, b"xx\0xx")
+
+    nested = [[b"\0\0\0\0", b"xx\0xx"], [b"\0_\0_\0_\0", b"\0"]]
+    val = ops.convert_to_tensor(nested).numpy()
+    # NOTE(mrry): Do not use assertAllEqual, because it converts nested to a
+    #   numpy array, which loses the null terminators.
+    self.assertEqual(val.tolist(), nested)
+
   def testExplicitShapeNumPy(self):
     c = constant_op.constant(
         np.arange(-15, 15).reshape([2, 3, 5]).astype(np.float32),
-- 
GitLab


From bd64cddd4773708fc95cfab33d16e7785967d1e3 Mon Sep 17 00:00:00 2001
From: Xiaoqiang Zheng 
Date: Wed, 30 Aug 2017 14:37:32 -0700
Subject: [PATCH 119/294] Fix bias_add size calculation for half.

PiperOrigin-RevId: 167051419
---
 tensorflow/core/kernels/bias_op_gpu.cu.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/bias_op_gpu.cu.cc b/tensorflow/core/kernels/bias_op_gpu.cu.cc
index e07ca5e0c4..ddc2d457b0 100644
--- a/tensorflow/core/kernels/bias_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bias_op_gpu.cu.cc
@@ -142,9 +142,9 @@ __global__ void BiasGradNCHW_SharedAtomics(const T* output_backprop,
                                            int group_size) {
   // Initialize the shared memory.
   typedef typename AccumulatorType::type AccT;
-  __shared__ AccT s_data[32];
-  int32 s_data_size = sizeof(s_data) / sizeof(T);
-  for (int32 index = threadIdx.x; index < s_data_size; index += blockDim.x) {
+  const int32 kSDataSize = 32;
+  __shared__ AccT s_data[kSDataSize];
+  for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) {
     s_data[index] = AccT(0);
   }
   __syncthreads();
-- 
GitLab


From 2ce226d4b66bd4b77fede24793e438c4b42c0633 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 14:44:08 -0700
Subject: [PATCH 120/294] Only add reduce-precision ops inside
 FusionKind::kLoop fusion nodes.

Other sorts of fusion nodes can't necessarily handle arbitrary elementwise additions into the computation.

PiperOrigin-RevId: 167052456
---
 .../compiler/xla/service/reduce_precision_insertion.cc      | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc
index 01dbb7e866..33327dc60f 100644
--- a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc
+++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc
@@ -122,7 +122,8 @@ StatusOr ReducePrecisionInsertion::insert_on_inputs(
         continue;
       }
 
-      if (instruction->opcode() == HloOpcode::kFusion) {
+      if (instruction->opcode() == HloOpcode::kFusion &&
+          instruction->fusion_kind() == HloInstruction::FusionKind::kLoop) {
         // Insert the reduce-precision operation inside the fusion computation,
         // after the corresponding parameter instruction.
         TF_ASSIGN_OR_RETURN(
@@ -171,7 +172,8 @@ StatusOr ReducePrecisionInsertion::insert_on_outputs(
       continue;
     }
 
-    if (instruction->opcode() == HloOpcode::kFusion) {
+    if (instruction->opcode() == HloOpcode::kFusion &&
+        instruction->fusion_kind() == HloInstruction::FusionKind::kLoop) {
       // Insert the reduce-precision operation as the last operation inside
       // the fusion computation.
       HloInstruction* fusion_root = instruction->fused_expression_root();
-- 
GitLab


From 394214dc5ca7437fa07532be44b6f99193313a27 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 14:44:44 -0700
Subject: [PATCH 121/294] Set the GPU execution context inside the callback
 wrapper in CudaSolver::CopyLapackInfoToHostAsync, in case the caller launches
 additional kernels in the provided callback.

PiperOrigin-RevId: 167052565
---
 tensorflow/core/kernels/cuda_solvers.cc | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/kernels/cuda_solvers.cc
index 3a8ccfe6b7..5c6b5eec82 100644
--- a/tensorflow/core/kernels/cuda_solvers.cc
+++ b/tensorflow/core/kernels/cuda_solvers.cc
@@ -30,10 +30,13 @@
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/platform/cuda.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
 
+using ::perftools::gputools::cuda::ScopedActivateExecutorContext;
+
 namespace tensorflow {
 namespace {
 
@@ -148,7 +151,12 @@ Status CudaSolver::CopyLapackInfoToHostAsync(
   // This callback checks that all batch items in all calls were processed
   // successfully and passes status to the info_checker_callback accordingly.
   auto wrapped_info_checker_callback =
-      [info_checker_callback](std::vector host_lapack_infos) {
+      [](OpKernelContext* context,
+         std::function&)>
+             info_checker_callback,
+         std::vector host_lapack_infos) {
+        auto stream = context->op_device_context()->stream();
+        ScopedActivateExecutorContext scoped_activation{stream->parent()};
         Status status;
         for (const auto& host_lapack_info : host_lapack_infos) {
           for (int i = 0; i < host_lapack_info.size() && status.ok(); ++i) {
@@ -166,8 +174,10 @@ Status CudaSolver::CopyLapackInfoToHostAsync(
         }
         info_checker_callback(status, host_lapack_infos);
       };
+
   auto cb =
-      std::bind(wrapped_info_checker_callback, std::move(host_lapack_infos));
+      std::bind(wrapped_info_checker_callback, context_,
+                std::move(info_checker_callback), std::move(host_lapack_infos));
   auto stream = context_->op_device_context()->stream();
   context_->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
       stream, std::move(cb));
-- 
GitLab


From f63078d75c06d401edd52f4afd3f9efaf667f72f Mon Sep 17 00:00:00 2001
From: Jianwei Xie 
Date: Wed, 30 Aug 2017 14:52:52 -0700
Subject: [PATCH 122/294] Fixed a bug that the job was calculated based on
 wrong master for evaluation.

PiperOrigin-RevId: 167053816
---
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 25 +++++++++++--------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 7c883ec926..485c884bb3 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -102,10 +102,12 @@ def _increase_eval_step_op(iterations_per_loop):
                               use_locking=True)
 
 
-def _tpu_job(run_config):
+def _tpu_job(run_config, mode):
   # The tpu job is determined by the run_config. Right now, this method is
   # required as tpu_config is not part of the RunConfig.
-  return None if run_config.master in ['', 'local'] else 'tpu_worker'
+  master = (run_config.evaluation_master if mode == model_fn_lib.ModeKeys.EVAL
+            else run_config.master)
+  return None if master in ['', 'local'] else 'tpu_worker'
 
 
 def _is_running_on_cpu(use_tpu, mode, eval_batch_size):
@@ -265,9 +267,9 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
      dequeue.
   """
 
-  def __init__(self, run_config, enqueue_fn, dequeue_ops=None):
+  def __init__(self, run_config, mode, enqueue_fn, dequeue_ops=None):
     self._iterations = run_config.tpu_config.iterations_per_loop
-    self._tpu_job = _tpu_job(run_config)
+    self._tpu_job = _tpu_job(run_config, mode)
     self._enqueue_fn = enqueue_fn
     self._dequeue_ops = dequeue_ops
 
@@ -899,7 +901,7 @@ class _EvalMetrics(object):
     """
 
     num_shards = run_config.tpu_config.num_shards
-    job = _tpu_job(run_config)
+    job = _tpu_job(run_config, model_fn_lib.ModeKeys.EVAL)
     job_device = '' if job is None else ('/job:%s' % job)
 
     # For each i, dequeue_ops[i] is a list containing the tensors from all
@@ -1162,7 +1164,7 @@ class TPUEstimator(estimator_lib.Estimator):
       with ops.device('/device:CPU:0'):
         return input_fn(**kwargs)
 
-    job = _tpu_job(config)
+    job = _tpu_job(config, mode)
     def placement_function(index):
       if job is None:
         return '/replica:0/task:0/device:CPU:0'
@@ -1190,13 +1192,14 @@ class TPUEstimator(estimator_lib.Estimator):
 
 # TODO(b/64607814): Ensure batch_axis works with nested structures.
 def _create_infeed_enqueue_ops_and_dequeue_fn(inputs_holder, run_config,
-                                              batch_axis):
+                                              batch_axis, mode):
   """Utility to convert input_fn to enqueue and dequeue fns for TPU.
 
   Args:
     inputs_holder: An `_InputsHolder` holding features and labels.
     run_config: A `RunConfig` instance.
     batch_axis: A python list of batch dimensions.
+    mode: ModeKeys
 
   Returns:
     A tuple of (dequeue_fn, enqueue_fn)
@@ -1239,7 +1242,7 @@ def _create_infeed_enqueue_ops_and_dequeue_fn(inputs_holder, run_config,
       return infeed_queue.generate_enqueue_ops(
           sharded_inputs, tpu_ordinal_function=tpu_ordinal_function)
     else:
-      job = _tpu_job(run_config)
+      job = _tpu_job(run_config, mode)
       def placement_function(index):
         if job is None:
           return '/replica:0/task:0/device:CPU:0'
@@ -1271,12 +1274,12 @@ def _augment_model_fn(model_fn, train_batch_size, eval_batch_size, use_tpu,
                            num_shards=config.tpu_config.num_shards)
 
     dequeue_fn, enqueue_fn = _create_infeed_enqueue_ops_and_dequeue_fn(
-        inputs, config, batch_axis)
+        inputs, config, batch_axis, mode)
 
     if mode == model_fn_lib.ModeKeys.TRAIN:
       loss = _train_on_tpu_system(model_fn_wrapper, dequeue_fn)
       hooks = [
-          TPUInfeedOutfeedSessionHook(config, enqueue_fn),
+          TPUInfeedOutfeedSessionHook(config, mode, enqueue_fn),
           training.LoggingTensorHook(
               {'loss': array_ops.identity(loss),
                'step': training.get_global_step()},
@@ -1318,7 +1321,7 @@ def _augment_model_fn(model_fn, train_batch_size, eval_batch_size, use_tpu,
         eval_metric_ops.to_metric_metric_ops_for_tpu(
             config, dummy_update_op))
     hooks = [
-        TPUInfeedOutfeedSessionHook(config, enqueue_fn, eval_update_ops),
+        TPUInfeedOutfeedSessionHook(config, mode, enqueue_fn, eval_update_ops),
     ]
 
     return model_fn_lib.EstimatorSpec(
-- 
GitLab


From af20b42c2c926e998a80a6e725d282f7fab79992 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 14:59:32 -0700
Subject: [PATCH 123/294] Add make_best_model_export_strategy which exports the
 best model according to eval results.

PiperOrigin-RevId: 167054780
---
 .../learn/utils/saved_model_export_utils.py   | 197 +++++++++++---
 .../utils/saved_model_export_utils_test.py    | 241 ++++++++++++------
 2 files changed, 315 insertions(+), 123 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
index 1e68a3ef66..676e1f2b51 100644
--- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
+++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Utilities supporting export to SavedModel.
 
 Some contents of this file are moved to tensorflow/python/estimator/export.py:
@@ -39,6 +38,7 @@ import time
 from tensorflow.contrib.layers.python.layers import feature_column
 from tensorflow.contrib.learn.python.learn import export_strategy
 from tensorflow.contrib.learn.python.learn.estimators import constants
+from tensorflow.contrib.learn.python.learn.estimators import metric_key
 from tensorflow.contrib.learn.python.learn.estimators import prediction_key
 from tensorflow.contrib.learn.python.learn.utils import gc
 from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
@@ -75,8 +75,8 @@ FEATURES_INPUT_ALTERNATIVE_KEY = 'features_input_alternative'
 _FALLBACK_DEFAULT_OUTPUT_ALTERNATIVE_KEY = 'default_output_alternative'
 
 
-def build_standardized_signature_def(
-    input_tensors, output_tensors, problem_type):
+def build_standardized_signature_def(input_tensors, output_tensors,
+                                     problem_type):
   """Build a SignatureDef using problem type and input and output Tensors.
 
   Note that this delegates the actual creation of the signatures to methods in
@@ -116,8 +116,8 @@ def build_standardized_signature_def(
     (_, predictions), = output_tensors.items()
     return signature_def_utils.regression_signature_def(examples, predictions)
   else:
-    return signature_def_utils.predict_signature_def(
-        input_tensors, output_tensors)
+    return signature_def_utils.predict_signature_def(input_tensors,
+                                                     output_tensors)
 
 
 def _get_classification_scores(output_tensors):
@@ -139,17 +139,15 @@ def _is_classification_problem(problem_type, input_tensors, output_tensors):
   classes = _get_classification_classes(output_tensors)
   scores = _get_classification_scores(output_tensors)
   return ((problem_type == constants.ProblemType.CLASSIFICATION or
-           problem_type == constants.ProblemType.LOGISTIC_REGRESSION)
-          and len(input_tensors) == 1
-          and (classes is not None or
-               scores is not None or
-               len(output_tensors) == 1))
+           problem_type == constants.ProblemType.LOGISTIC_REGRESSION) and
+          len(input_tensors) == 1 and
+          (classes is not None or scores is not None or
+           len(output_tensors) == 1))
 
 
 def _is_regression_problem(problem_type, input_tensors, output_tensors):
-  return (problem_type == constants.ProblemType.LINEAR_REGRESSION
-          and len(input_tensors) == 1
-          and len(output_tensors) == 1)
+  return (problem_type == constants.ProblemType.LINEAR_REGRESSION and
+          len(input_tensors) == 1 and len(output_tensors) == 1)
 
 
 def get_input_alternatives(input_ops):
@@ -177,9 +175,7 @@ def get_input_alternatives(input_ops):
   return input_alternatives, features
 
 
-def get_output_alternatives(
-    model_fn_ops,
-    default_output_alternative_key=None):
+def get_output_alternatives(model_fn_ops, default_output_alternative_key=None):
   """Obtain all output alternatives using the model_fn output and heuristics.
 
   Args:
@@ -218,8 +214,10 @@ def get_output_alternatives(
       default_outputs = {prediction_key.PredictionKey.GENERIC: default_outputs}
     actual_default_output_alternative_key = (
         _FALLBACK_DEFAULT_OUTPUT_ALTERNATIVE_KEY)
-    output_alternatives = {actual_default_output_alternative_key:
-                           (default_problem_type, default_outputs)}
+    output_alternatives = {
+        actual_default_output_alternative_key: (default_problem_type,
+                                                default_outputs)
+    }
     return output_alternatives, actual_default_output_alternative_key
 
   if default_output_alternative_key:
@@ -246,13 +244,12 @@ def build_all_signature_defs(input_alternatives, output_alternatives,
                              actual_default_output_alternative_key):
   """Build `SignatureDef`s from all pairs of input and output alternatives."""
 
-  signature_def_map = {
-      ('%s:%s' % (input_key, output_key or 'None')):
-      build_standardized_signature_def(
-          inputs, outputs, problem_type)
-      for input_key, inputs in input_alternatives.items()
-      for output_key, (problem_type, outputs)
-      in output_alternatives.items()}
+  signature_def_map = {('%s:%s' % (input_key, output_key or 'None')):
+                       build_standardized_signature_def(inputs, outputs,
+                                                        problem_type)
+                       for input_key, inputs in input_alternatives.items()
+                       for output_key, (problem_type,
+                                        outputs) in output_alternatives.items()}
 
   # Add the default SignatureDef
   default_inputs = input_alternatives.get(DEFAULT_INPUT_ALTERNATIVE_KEY)
@@ -263,8 +260,8 @@ def build_all_signature_defs(input_alternatives, output_alternatives,
   (default_problem_type, default_outputs) = (
       output_alternatives[actual_default_output_alternative_key])
   signature_def_map[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = (
-      build_standardized_signature_def(
-          default_inputs, default_outputs, default_problem_type))
+      build_standardized_signature_def(default_inputs, default_outputs,
+                                       default_problem_type))
 
   return signature_def_map
 
@@ -308,9 +305,8 @@ def get_timestamped_export_dir(export_dir_base):
       return export_dir
     time.sleep(1)
     attempts += 1
-    logging.warn(
-        'Export directory {} already exists; retrying (attempt {}/{})'.format(
-            export_dir, attempts, MAX_DIRECTORY_CREATION_ATTEMPTS))
+    logging.warn('Export directory {} already exists; retrying (attempt {}/{})'.
+                 format(export_dir, attempts, MAX_DIRECTORY_CREATION_ATTEMPTS))
   raise RuntimeError('Failed to obtain a unique export directory name after '
                      '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS))
 
@@ -330,8 +326,7 @@ def get_temp_export_dir(timestamped_export_dir):
   """
   (dirname, basename) = os.path.split(timestamped_export_dir)
   temp_export_dir = os.path.join(
-      compat.as_bytes(dirname),
-      compat.as_bytes('temp-{}'.format(basename)))
+      compat.as_bytes(dirname), compat.as_bytes('temp-{}'.format(basename)))
   return temp_export_dir
 
 
@@ -357,8 +352,8 @@ def get_most_recent_export(export_dir_base):
     A gc.Path, with is just a namedtuple of (path, export_version).
   """
   select_filter = gc.largest_export_versions(1)
-  results = select_filter(gc.get_paths(export_dir_base,
-                                       parser=_export_version_parser))
+  results = select_filter(
+      gc.get_paths(export_dir_base, parser=_export_version_parser))
   return next(iter(results or []), None)
 
 
@@ -378,8 +373,8 @@ def garbage_collect_exports(export_dir_base, exports_to_keep):
 
   keep_filter = gc.largest_export_versions(exports_to_keep)
   delete_filter = gc.negation(keep_filter)
-  for p in delete_filter(gc.get_paths(export_dir_base,
-                                      parser=_export_version_parser)):
+  for p in delete_filter(
+      gc.get_paths(export_dir_base, parser=_export_version_parser)):
     try:
       gfile.DeleteRecursively(p.path)
     except errors_impl.NotFoundError as e:
@@ -416,10 +411,7 @@ def make_export_strategy(serving_input_fn,
     An ExportStrategy that can be passed to the Experiment constructor.
   """
 
-  def export_fn(estimator,
-                export_dir_base,
-                checkpoint_path=None
-               ):
+  def export_fn(estimator, export_dir_base, checkpoint_path=None):
     """Exports the given Estimator as a SavedModel.
 
     Args:
@@ -512,3 +504,128 @@ def make_parsing_export_strategy(feature_columns,
       assets_extra=assets_extra,
       as_text=as_text,
       exports_to_keep=exports_to_keep)
+
+
+def _default_compare_fn(curr_best_eval_result, cand_eval_result):
+  """Compares two evaluation results and returns true if the 2nd one is better.
+
+  Both evaluation results should have the values for MetricKey.LOSS, which are
+  used for comparison.
+
+  Args:
+    curr_best_eval_result: current best eval metrics.
+    cand_eval_result: candidate eval metrics.
+
+  Returns:
+    True if cand_eval_result is better.
+
+  Raises:
+    ValueError: If input eval result is None or no loss is available.
+  """
+  default_key = metric_key.MetricKey.LOSS
+  if not curr_best_eval_result or default_key not in curr_best_eval_result:
+    raise ValueError(
+        'curr_best_eval_result cannot be empty or no loss is found in it.')
+
+  if not cand_eval_result or default_key not in cand_eval_result:
+    raise ValueError(
+        'cand_eval_result cannot be empty or no loss is found in it.')
+
+  return curr_best_eval_result[default_key] > cand_eval_result[default_key]
+
+
+class BestModelSelector(object):
+  """A helper that keeps track of export selection candidates."""
+
+  def __init__(self, compare_fn=None):
+    """Constructor of this class.
+
+    Args:
+      compare_fn: a function that returns true if the candidate is better than
+        the current best model.
+    """
+    self._best_eval_result = None
+    self._compare_fn = compare_fn or _default_compare_fn
+
+  def update(self, checkpoint_path, eval_result):
+    """Records a given checkpoint and exports if this is the best model.
+
+    Args:
+      checkpoint_path: the checkpoint path to export.
+      eval_result: a dictionary which is usually generated in evaluation runs.
+        By default, eval_results contains 'loss' field.
+
+    Returns:
+      A string representing the path to the checkpoint to be exported.
+      A dictionary of the same type of eval_result.
+
+    Raises:
+      ValueError: if checkpoint path is empty.
+      ValueError: if eval_results is None object.
+    """
+    if not checkpoint_path:
+      raise ValueError('Checkpoint path is empty.')
+    if eval_result is None:
+      raise ValueError('%s has empty evaluation results.', checkpoint_path)
+
+    if (self._best_eval_result is None or
+        self._compare_fn(self._best_eval_result, eval_result)):
+      self._best_eval_result = eval_result
+      return checkpoint_path, eval_result
+    else:
+      return '', None
+
+
+def make_best_model_export_strategy(serving_input_fn,
+                                    exports_to_keep=1,
+                                    compare_fn=None,
+                                    default_output_alternative_key=None):
+  """Creates an custom ExportStrategy for use with tf.contrib.learn.Experiment.
+
+  Args:
+    serving_input_fn: a function that takes no arguments and returns an
+      `InputFnOps`.
+    exports_to_keep: an integer indicating how many historical best models need
+      to be preserved.
+    compare_fn: a function that select the 'best' candidate from a dictionary
+        of evaluation result keyed by corresponding checkpoint path.
+    default_output_alternative_key: the key for default serving signature for
+        multi-headed inference graphs.
+
+  Returns:
+    An ExportStrategy that can be passed to the Experiment constructor.
+  """
+  best_model_export_strategy = make_export_strategy(
+      serving_input_fn,
+      exports_to_keep=exports_to_keep,
+      default_output_alternative_key=default_output_alternative_key)
+
+  best_model_selector = BestModelSelector(compare_fn)
+
+  def export_fn(estimator, export_dir_base, checkpoint_path, eval_result=None):
+    """Exports the given Estimator as a SavedModel.
+
+    Args:
+      estimator: the Estimator to export.
+      export_dir_base: A string containing a directory to write the exported
+        graph and checkpoints.
+      checkpoint_path: The checkpoint path to export.  If None (the default),
+        the most recent checkpoint found within the model directory is chosen.
+      eval_result: placehold args matching the call signature of ExportStrategy.
+
+    Returns:
+      The string path to the exported directory.
+    """
+
+    export_checkpoint_path, export_eval_result = best_model_selector.update(
+        checkpoint_path, eval_result)
+
+    if export_checkpoint_path and export_eval_result is not None:
+      checkpoint_base = os.path.basename(export_checkpoint_path)
+      export_dir = os.path.join(export_dir_base, checkpoint_base)
+      return best_model_export_strategy.export(
+          estimator, export_dir, export_checkpoint_path, export_eval_result)
+    else:
+      return ''
+
+  return export_strategy.ExportStrategy('best_model', export_fn)
diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
index 9e778ab72a..66bca9c0f5 100644
--- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
+++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
@@ -24,6 +24,7 @@ import time
 from tensorflow.contrib.layers.python.layers import feature_column as fc
 from tensorflow.contrib.learn.python.learn import export_strategy as export_strategy_lib
 from tensorflow.contrib.learn.python.learn.estimators import constants
+from tensorflow.contrib.learn.python.learn.estimators import estimator as core_estimator
 from tensorflow.contrib.learn.python.learn.estimators import model_fn
 from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
 from tensorflow.contrib.learn.python.learn.utils import saved_model_export_utils
@@ -40,18 +41,43 @@ from tensorflow.python.saved_model import signature_def_utils
 from tensorflow.python.util import compat
 
 
+class TestEstimator(core_estimator.Estimator):
+
+  def __init__(self, *args, **kwargs):
+    super(TestEstimator, self).__init__(*args, **kwargs)
+    self.last_exported_checkpoint = ""
+    self.last_exported_dir = ""
+
+  # @Override
+  def export_savedmodel(self,
+                        export_dir,
+                        serving_input_fn,
+                        default_output_alternative_key=None,
+                        assets_extra=None,
+                        as_text=False,
+                        checkpoint_path=None):
+
+    if not os.path.exists(export_dir):
+      os.makedirs(export_dir)
+
+    open(os.path.join(export_dir, "placeholder.txt"), "a").close()
+
+    self.last_exported_checkpoint = checkpoint_path
+    self.last_exported_dir = export_dir
+
+    return export_dir
+
+
 class SavedModelExportUtilsTest(test.TestCase):
 
   def test_build_standardized_signature_def_regression(self):
     input_tensors = {
         "input-1":
-            array_ops.placeholder(
-                dtypes.float32, 1, name="input-tensor-1")
+            array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1")
     }
     output_tensors = {
         "output-1":
-            array_ops.placeholder(
-                dtypes.float32, 1, name="output-tensor-1")
+            array_ops.placeholder(dtypes.float32, 1, name="output-tensor-1")
     }
     problem_type = constants.ProblemType.LINEAR_REGRESSION
     actual_signature_def = (
@@ -61,10 +87,9 @@ class SavedModelExportUtilsTest(test.TestCase):
     shape = tensor_shape_pb2.TensorShapeProto(
         dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
     dtype = types_pb2.DataType.Value("DT_FLOAT")
-    expected_signature_def.inputs[
-        signature_constants.REGRESS_INPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(
-                name="input-tensor-1:0", dtype=dtype, tensor_shape=shape))
+    expected_signature_def.inputs[signature_constants.REGRESS_INPUTS].CopyFrom(
+        meta_graph_pb2.TensorInfo(
+            name="input-tensor-1:0", dtype=dtype, tensor_shape=shape))
     expected_signature_def.outputs[
         signature_constants.REGRESS_OUTPUTS].CopyFrom(
             meta_graph_pb2.TensorInfo(
@@ -77,13 +102,11 @@ class SavedModelExportUtilsTest(test.TestCase):
     """Tests classification with one output tensor."""
     input_tensors = {
         "input-1":
-            array_ops.placeholder(
-                dtypes.float32, 1, name="input-tensor-1")
+            array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1")
     }
     output_tensors = {
         "output-1":
-            array_ops.placeholder(
-                dtypes.string, 1, name="output-tensor-1")
+            array_ops.placeholder(dtypes.string, 1, name="output-tensor-1")
     }
     problem_type = constants.ProblemType.CLASSIFICATION
     actual_signature_def = (
@@ -94,14 +117,14 @@ class SavedModelExportUtilsTest(test.TestCase):
         dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
     dtype_float = types_pb2.DataType.Value("DT_FLOAT")
     dtype_string = types_pb2.DataType.Value("DT_STRING")
-    expected_signature_def.inputs[
-        signature_constants.CLASSIFY_INPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(
-                name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
+    expected_signature_def.inputs[signature_constants.CLASSIFY_INPUTS].CopyFrom(
+        meta_graph_pb2.TensorInfo(
+            name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
     expected_signature_def.outputs[
         signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom(
             meta_graph_pb2.TensorInfo(
-                name="output-tensor-1:0", dtype=dtype_string,
+                name="output-tensor-1:0",
+                dtype=dtype_string,
                 tensor_shape=shape))
 
     expected_signature_def.method_name = (
@@ -112,8 +135,7 @@ class SavedModelExportUtilsTest(test.TestCase):
     """Tests multiple output tensors that include classes and probabilities."""
     input_tensors = {
         "input-1":
-            array_ops.placeholder(
-                dtypes.float32, 1, name="input-tensor-1")
+            array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1")
     }
     output_tensors = {
         "classes":
@@ -136,19 +158,20 @@ class SavedModelExportUtilsTest(test.TestCase):
         dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
     dtype_float = types_pb2.DataType.Value("DT_FLOAT")
     dtype_string = types_pb2.DataType.Value("DT_STRING")
-    expected_signature_def.inputs[
-        signature_constants.CLASSIFY_INPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(
-                name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
+    expected_signature_def.inputs[signature_constants.CLASSIFY_INPUTS].CopyFrom(
+        meta_graph_pb2.TensorInfo(
+            name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
     expected_signature_def.outputs[
         signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom(
             meta_graph_pb2.TensorInfo(
-                name="output-tensor-classes:0", dtype=dtype_string,
+                name="output-tensor-classes:0",
+                dtype=dtype_string,
                 tensor_shape=shape))
     expected_signature_def.outputs[
         signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom(
             meta_graph_pb2.TensorInfo(
-                name="output-tensor-proba:0", dtype=dtype_float,
+                name="output-tensor-proba:0",
+                dtype=dtype_float,
                 tensor_shape=shape))
 
     expected_signature_def.method_name = (
@@ -159,8 +182,7 @@ class SavedModelExportUtilsTest(test.TestCase):
     """Tests multiple output tensors that include classes and scores."""
     input_tensors = {
         "input-1":
-            array_ops.placeholder(
-                dtypes.float32, 1, name="input-tensor-1")
+            array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1")
     }
     output_tensors = {
         "classes":
@@ -182,19 +204,20 @@ class SavedModelExportUtilsTest(test.TestCase):
         dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
     dtype_float = types_pb2.DataType.Value("DT_FLOAT")
     dtype_string = types_pb2.DataType.Value("DT_STRING")
-    expected_signature_def.inputs[
-        signature_constants.CLASSIFY_INPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(
-                name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
+    expected_signature_def.inputs[signature_constants.CLASSIFY_INPUTS].CopyFrom(
+        meta_graph_pb2.TensorInfo(
+            name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
     expected_signature_def.outputs[
         signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom(
             meta_graph_pb2.TensorInfo(
-                name="output-tensor-classes:0", dtype=dtype_string,
+                name="output-tensor-classes:0",
+                dtype=dtype_string,
                 tensor_shape=shape))
     expected_signature_def.outputs[
         signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom(
             meta_graph_pb2.TensorInfo(
-                name="output-tensor-scores:0", dtype=dtype_float,
+                name="output-tensor-scores:0",
+                dtype=dtype_float,
                 tensor_shape=shape))
 
     expected_signature_def.method_name = (
@@ -205,8 +228,7 @@ class SavedModelExportUtilsTest(test.TestCase):
     """Tests classification without classes tensor."""
     input_tensors = {
         "input-1":
-            array_ops.placeholder(
-                dtypes.float32, 1, name="input-tensor-1")
+            array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1")
     }
     output_tensors = {
         "probabilities":
@@ -224,14 +246,14 @@ class SavedModelExportUtilsTest(test.TestCase):
     shape = tensor_shape_pb2.TensorShapeProto(
         dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
     dtype_float = types_pb2.DataType.Value("DT_FLOAT")
-    expected_signature_def.inputs[
-        signature_constants.CLASSIFY_INPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(
-                name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
+    expected_signature_def.inputs[signature_constants.CLASSIFY_INPUTS].CopyFrom(
+        meta_graph_pb2.TensorInfo(
+            name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
     expected_signature_def.outputs[
         signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom(
             meta_graph_pb2.TensorInfo(
-                name="output-tensor-proba:0", dtype=dtype_float,
+                name="output-tensor-proba:0",
+                dtype=dtype_float,
                 tensor_shape=shape))
 
     expected_signature_def.method_name = (
@@ -246,8 +268,7 @@ class SavedModelExportUtilsTest(test.TestCase):
     """
     input_tensors = {
         "input-1":
-            array_ops.placeholder(
-                dtypes.float32, 1, name="input-tensor-1")
+            array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1")
     }
     output_tensors = {
         "classes":
@@ -268,14 +289,14 @@ class SavedModelExportUtilsTest(test.TestCase):
     shape = tensor_shape_pb2.TensorShapeProto(
         dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
     dtype_float = types_pb2.DataType.Value("DT_FLOAT")
-    expected_signature_def.inputs[
-        signature_constants.CLASSIFY_INPUTS].CopyFrom(
-            meta_graph_pb2.TensorInfo(
-                name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
+    expected_signature_def.inputs[signature_constants.CLASSIFY_INPUTS].CopyFrom(
+        meta_graph_pb2.TensorInfo(
+            name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
     expected_signature_def.outputs[
         signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom(
             meta_graph_pb2.TensorInfo(
-                name="output-tensor-scores:0", dtype=dtype_float,
+                name="output-tensor-scores:0",
+                dtype=dtype_float,
                 tensor_shape=shape))
 
     expected_signature_def.method_name = (
@@ -290,8 +311,7 @@ class SavedModelExportUtilsTest(test.TestCase):
     """
     input_tensors = {
         "input-1":
-            array_ops.placeholder(
-                dtypes.float32, 1, name="input-tensor-1")
+            array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1")
     }
     output_tensors = {
         "classes":
@@ -310,17 +330,18 @@ class SavedModelExportUtilsTest(test.TestCase):
         dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
     dtype_int64 = types_pb2.DataType.Value("DT_INT64")
     dtype_float = types_pb2.DataType.Value("DT_FLOAT")
-    expected_signature_def.inputs[
-        "input-1"].CopyFrom(
-            meta_graph_pb2.TensorInfo(
-                name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
+    expected_signature_def.inputs["input-1"].CopyFrom(
+        meta_graph_pb2.TensorInfo(
+            name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape))
     expected_signature_def.outputs["classes"].CopyFrom(
         meta_graph_pb2.TensorInfo(
-            name="output-tensor-classes:0", dtype=dtype_int64,
+            name="output-tensor-classes:0",
+            dtype=dtype_int64,
             tensor_shape=shape))
     expected_signature_def.outputs["logits"].CopyFrom(
         meta_graph_pb2.TensorInfo(
-            name="output-tensor-logits:0", dtype=dtype_float,
+            name="output-tensor-logits:0",
+            dtype=dtype_float,
             tensor_shape=shape))
 
     expected_signature_def.method_name = (
@@ -379,8 +400,9 @@ class SavedModelExportUtilsTest(test.TestCase):
   def test_get_output_alternatives_single_no_default(self):
     prediction_tensor = constant_op.constant(["bogus"])
     provided_output_alternatives = {
-        "head-1": (constants.ProblemType.LINEAR_REGRESSION,
-                   {"output": prediction_tensor}),
+        "head-1": (constants.ProblemType.LINEAR_REGRESSION, {
+            "output": prediction_tensor
+        }),
     }
     model_fn_ops = model_fn.ModelFnOps(
         model_fn.ModeKeys.INFER,
@@ -390,10 +412,11 @@ class SavedModelExportUtilsTest(test.TestCase):
     output_alternatives, _ = saved_model_export_utils.get_output_alternatives(
         model_fn_ops)
 
-    self.assertEqual({"head-1":
-                      (constants.ProblemType.LINEAR_REGRESSION,
-                       {"output": prediction_tensor})},
-                     output_alternatives)
+    self.assertEqual({
+        "head-1": (constants.ProblemType.LINEAR_REGRESSION, {
+            "output": prediction_tensor
+        })
+    }, output_alternatives)
 
   def test_get_output_alternatives_multi_no_default(self):
     provided_output_alternatives = {
@@ -424,10 +447,11 @@ class SavedModelExportUtilsTest(test.TestCase):
     output_alternatives, _ = saved_model_export_utils.get_output_alternatives(
         model_fn_ops)
 
-    self.assertEqual(
-        {"default_output_alternative": (constants.ProblemType.UNSPECIFIED, {
-            "some_output": prediction_tensor})},
-        output_alternatives)
+    self.assertEqual({
+        "default_output_alternative": (constants.ProblemType.UNSPECIFIED, {
+            "some_output": prediction_tensor
+        })
+    }, output_alternatives)
 
   def test_get_output_alternatives_empty_provided_with_default(self):
     prediction_tensor = constant_op.constant(["bogus"])
@@ -452,10 +476,11 @@ class SavedModelExportUtilsTest(test.TestCase):
     output_alternatives, _ = saved_model_export_utils.get_output_alternatives(
         model_fn_ops)
 
-    self.assertEqual(
-        {"default_output_alternative": (constants.ProblemType.UNSPECIFIED, {
-            "some_output": prediction_tensor})},
-        output_alternatives)
+    self.assertEqual({
+        "default_output_alternative": (constants.ProblemType.UNSPECIFIED, {
+            "some_output": prediction_tensor
+        })
+    }, output_alternatives)
 
   def test_get_output_alternatives_implicit_single(self):
     prediction_tensor = constant_op.constant(["bogus"])
@@ -506,14 +531,14 @@ class SavedModelExportUtilsTest(test.TestCase):
 
     expected_signature_defs = {
         "serving_default":
-            signature_def_utils.regression_signature_def(input_example,
-                                                         output_1),
+            signature_def_utils.regression_signature_def(
+                input_example, output_1),
         "default_input_alternative:head-1":
-            signature_def_utils.regression_signature_def(input_example,
-                                                         output_1),
+            signature_def_utils.regression_signature_def(
+                input_example, output_1),
         "default_input_alternative:head-2":
-            signature_def_utils.classification_signature_def(input_example,
-                                                             output_2, None),
+            signature_def_utils.classification_signature_def(
+                input_example, output_2, None),
         "default_input_alternative:head-3":
             signature_def_utils.predict_signature_def({
                 "default input": input_example
@@ -624,17 +649,20 @@ class SavedModelExportUtilsTest(test.TestCase):
     (most_recent_export_dir, most_recent_export_version) = (
         saved_model_export_utils.get_most_recent_export(export_dir_base))
 
-    self.assertEqual(compat.as_bytes(export_dir_4),
-                     compat.as_bytes(most_recent_export_dir))
-    self.assertEqual(compat.as_bytes(export_dir_4),
-                     os.path.join(compat.as_bytes(export_dir_base),
-                                  compat.as_bytes(
-                                      str(most_recent_export_version))))
+    self.assertEqual(
+        compat.as_bytes(export_dir_4), compat.as_bytes(most_recent_export_dir))
+    self.assertEqual(
+        compat.as_bytes(export_dir_4),
+        os.path.join(
+            compat.as_bytes(export_dir_base),
+            compat.as_bytes(str(most_recent_export_version))))
 
   def test_make_export_strategy(self):
     """Only tests that an ExportStrategy instance is created."""
+
     def _serving_input_fn():
       return array_ops.constant([1]), None
+
     export_strategy = saved_model_export_utils.make_export_strategy(
         serving_input_fn=_serving_input_fn,
         default_output_alternative_key="default",
@@ -655,14 +683,61 @@ class SavedModelExportUtilsTest(test.TestCase):
     real_valued_col1 = fc.real_valued_column("real_valued_column1")
     bucketized_col1 = fc.bucketized_column(
         fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4])
-    feature_columns = [sparse_col, embedding_col, real_valued_col1,
-                       bucketized_col1]
+    feature_columns = [
+        sparse_col, embedding_col, real_valued_col1, bucketized_col1
+    ]
 
     export_strategy = saved_model_export_utils.make_parsing_export_strategy(
         feature_columns=feature_columns)
     self.assertTrue(
         isinstance(export_strategy, export_strategy_lib.ExportStrategy))
 
+  def test_make_best_model_export_strategy(self):
+    export_dir_base = tempfile.mkdtemp() + "export/"
+    gfile.MkDir(export_dir_base)
+
+    test_estimator = TestEstimator()
+    export_strategy = saved_model_export_utils.make_best_model_export_strategy(
+        serving_input_fn=None, exports_to_keep=3, compare_fn=None)
+
+    self.assertNotEqual("",
+                        export_strategy.export(test_estimator, export_dir_base,
+                                               "fake_ckpt_0", {"loss": 100}))
+    self.assertNotEqual("", test_estimator.last_exported_dir)
+    self.assertNotEqual("", test_estimator.last_exported_checkpoint)
+
+    self.assertEqual("",
+                     export_strategy.export(test_estimator, export_dir_base,
+                                            "fake_ckpt_1", {"loss": 101}))
+    self.assertEqual(test_estimator.last_exported_dir,
+                     os.path.join(export_dir_base, "fake_ckpt_0"))
+
+    self.assertNotEqual("",
+                        export_strategy.export(test_estimator, export_dir_base,
+                                               "fake_ckpt_2", {"loss": 10}))
+    self.assertEqual(test_estimator.last_exported_dir,
+                     os.path.join(export_dir_base, "fake_ckpt_2"))
+
+    self.assertEqual("",
+                     export_strategy.export(test_estimator, export_dir_base,
+                                            "fake_ckpt_3", {"loss": 20}))
+    self.assertEqual(test_estimator.last_exported_dir,
+                     os.path.join(export_dir_base, "fake_ckpt_2"))
+
+  def test_make_best_model_export_strategy_exceptions(self):
+    export_dir_base = tempfile.mkdtemp() + "export/"
+
+    test_estimator = TestEstimator()
+    export_strategy = saved_model_export_utils.make_best_model_export_strategy(
+        serving_input_fn=None, exports_to_keep=3, compare_fn=None)
+
+    with self.assertRaises(ValueError):
+      export_strategy.export(test_estimator, export_dir_base, "", {"loss": 200})
+
+    with self.assertRaises(ValueError):
+      export_strategy.export(test_estimator, export_dir_base, "fake_ckpt_1",
+                             None)
+
 
 def _create_test_export_dir(export_dir_base):
   export_dir = saved_model_export_utils.get_timestamped_export_dir(
-- 
GitLab


From ff35512845f434f46099a61c508cf5361e4bc251 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy 
Date: Wed, 30 Aug 2017 15:05:39 -0700
Subject: [PATCH 124/294] Add SQLite licenses to tensorflow distributable
 packages.

---
 tensorflow/tools/lib_package/BUILD | 2 ++
 tensorflow/tools/pip_package/BUILD | 1 +
 2 files changed, 3 insertions(+)

diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index 494ddd2f5d..84d4e9859a 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -105,6 +105,7 @@ genrule(
         "@png_archive//:LICENSE",
         "@protobuf_archive//:LICENSE",
         "@snappy//:COPYING",
+        "@sqlite_archive//:LICENSE",
         "@zlib_archive//:zlib.h",
     ] + if_mkl([
         "//third_party/mkl:LICENSE",
@@ -140,6 +141,7 @@ genrule(
         "@png_archive//:LICENSE",
         "@protobuf_archive//:LICENSE",
         "@snappy//:COPYING",
+        "@sqlite_archive//:LICENSE",
         "@zlib_archive//:zlib.h",
     ] + if_mkl([
         "//third_party/mkl:LICENSE",
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index ae93cf210b..210b6643be 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -125,6 +125,7 @@ filegroup(
         "@protobuf_archive//:LICENSE",
         "@six_archive//:LICENSE",
         "@snappy//:COPYING",
+        "@sqlite_archive//:LICENSE",
         "@zlib_archive//:zlib.h",
         "@org_python_pypi_backports_weakref//:LICENSE",
     ] + if_mkl([
-- 
GitLab


From 6ee861444729063f7b0af8c2f8d37efcd854388e Mon Sep 17 00:00:00 2001
From: Jianwei Xie 
Date: Wed, 30 Aug 2017 15:49:43 -0700
Subject: [PATCH 125/294] Updates the docstring and model examples to use new
 style to specify metric_fn.

PiperOrigin-RevId: 167062007
---
 .../contrib/tpu/python/tpu/tpu_estimator.py      | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 485c884bb3..6748a76562 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -980,18 +980,20 @@ class TPUEstimator(estimator_lib.Estimator):
 
   Example (MNIST):
   ```
+  # The metric Fn which runs on CPU.
+  def metric_fn(labels, logits):
+    predictions = tf.argmax(logits, 1)
+    return {
+      'accuracy': tf.metrics.precision(
+          labels=labels, predictions=predictions),
+    }
+
+  # Your model Fn which runs on TPU.
   def model_fn(features, labels, mode, config, params):
     ...
     logits = ...
 
     if mode = tf.estimator.ModeKeys.EVAL:
-      def metric_fn(labels, logits):
-        predictions = tf.argmax(logits, 1)
-        return {
-          'precision': tf.metrics.precision(
-              labels=labels, predictions=predictions),
-        }
-
       return tpu_estimator.TPUEstimatorSpec(
           mode=mode,
           loss=loss,
-- 
GitLab


From 4ef8a4c996540ef795ae1942df7bb6370bb85d32 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy 
Date: Wed, 30 Aug 2017 15:58:39 -0700
Subject: [PATCH 126/294] Update sqlite license to unencumbered.

SQLite is public domain.
---
 tensorflow/tools/lib_package/BUILD | 2 --
 tensorflow/tools/pip_package/BUILD | 1 -
 third_party/sqlite.BUILD           | 4 ++--
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index 84d4e9859a..494ddd2f5d 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -105,7 +105,6 @@ genrule(
         "@png_archive//:LICENSE",
         "@protobuf_archive//:LICENSE",
         "@snappy//:COPYING",
-        "@sqlite_archive//:LICENSE",
         "@zlib_archive//:zlib.h",
     ] + if_mkl([
         "//third_party/mkl:LICENSE",
@@ -141,7 +140,6 @@ genrule(
         "@png_archive//:LICENSE",
         "@protobuf_archive//:LICENSE",
         "@snappy//:COPYING",
-        "@sqlite_archive//:LICENSE",
         "@zlib_archive//:zlib.h",
     ] + if_mkl([
         "//third_party/mkl:LICENSE",
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 210b6643be..ae93cf210b 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -125,7 +125,6 @@ filegroup(
         "@protobuf_archive//:LICENSE",
         "@six_archive//:LICENSE",
         "@snappy//:COPYING",
-        "@sqlite_archive//:LICENSE",
         "@zlib_archive//:zlib.h",
         "@org_python_pypi_backports_weakref//:LICENSE",
     ] + if_mkl([
diff --git a/third_party/sqlite.BUILD b/third_party/sqlite.BUILD
index f593b71542..9840d7b151 100644
--- a/third_party/sqlite.BUILD
+++ b/third_party/sqlite.BUILD
@@ -2,9 +2,9 @@
 #   Sqlite3 library. Provides utilities for interacting
 #   with sqlite3 databases.
 
-licenses(["notice"])  # BSD/MIT-like license
+licenses(["unencumbered"])  # Public Domain
 
-exports_files(["LICENSE"])
+# exports_files(["LICENSE"])
 
 cc_library(
     name = "sqlite",
-- 
GitLab


From 2e3d2f97de90508efd2729d90fbd74fb6d770961 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 16:01:58 -0700
Subject: [PATCH 127/294] [TF:XLA] Don't pass opcode separately in two HLO
 visitor functions.

HandleElementwiseUnary and HandleElementwiseBinary never use this parameter and it is accessible from the HLO instruction anyway.

No functional change.

PiperOrigin-RevId: 167063592
---
 .../compiler/xla/service/dfs_hlo_visitor.cc   | 10 ++--
 .../compiler/xla/service/dfs_hlo_visitor.h    | 57 +++++++++----------
 .../service/dfs_hlo_visitor_with_default.h    |  6 +-
 .../compiler/xla/service/hlo_cost_analysis.cc |  6 +-
 .../compiler/xla/service/hlo_cost_analysis.h  |  5 +-
 .../compiler/xla/service/hlo_verifier.cc      |  6 +-
 6 files changed, 40 insertions(+), 50 deletions(-)

diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc b/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc
index 669ebb55be..6efd0bcee5 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc
@@ -24,16 +24,14 @@ limitations under the License.
 
 namespace xla {
 
-Status DfsHloVisitor::HandleElementwiseUnary(HloInstruction* hlo,
-                                             HloOpcode opcode) {
+Status DfsHloVisitor::HandleElementwiseUnary(HloInstruction* hlo) {
   return Unimplemented("DfsHloVisitor::HandleElementwiseUnary: %s",
-                       HloOpcodeString(opcode).c_str());
+                       HloOpcodeString(hlo->opcode()).c_str());
 }
 
-Status DfsHloVisitor::HandleElementwiseBinary(HloInstruction* hlo,
-                                              HloOpcode opcode) {
+Status DfsHloVisitor::HandleElementwiseBinary(HloInstruction* hlo) {
   return Unimplemented("DfsHloVisitor::HandleElementwiseBinary: %s",
-                       HloOpcodeString(opcode).c_str());
+                       HloOpcodeString(hlo->opcode()).c_str());
 }
 
 DfsHloVisitor::VisitState DfsHloVisitor::GetVisitState(
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
index a1a3a882c7..2f21043a1d 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
@@ -63,37 +63,37 @@ class DfsHloVisitor {
   // These routines are self-descriptive, see class comment for usage
   // information.
 
-  virtual Status HandleElementwiseUnary(HloInstruction* hlo, HloOpcode opcode);
-  virtual Status HandleElementwiseBinary(HloInstruction* hlo, HloOpcode opcode);
+  virtual Status HandleElementwiseUnary(HloInstruction* hlo);
+  virtual Status HandleElementwiseBinary(HloInstruction* hlo);
   virtual Status HandleClamp(HloInstruction* clamp, HloInstruction* min,
                              HloInstruction* arg, HloInstruction* max) = 0;
   virtual Status HandleSelect(HloInstruction* select, HloInstruction* pred,
                               HloInstruction* on_true,
                               HloInstruction* on_false) = 0;
   virtual Status HandleMaximum(HloInstruction* maximum) {
-    return HandleElementwiseBinary(maximum, HloOpcode::kMaximum);
+    return HandleElementwiseBinary(maximum);
   }
   virtual Status HandleMinimum(HloInstruction* minimum) {
-    return HandleElementwiseBinary(minimum, HloOpcode::kMinimum);
+    return HandleElementwiseBinary(minimum);
   }
   virtual Status HandleConcatenate(
       HloInstruction* concatenate,
       tensorflow::gtl::ArraySlice operands) = 0;
   virtual Status HandleConvert(HloInstruction* convert) {
-    return HandleElementwiseUnary(convert, HloOpcode::kConvert);
+    return HandleElementwiseUnary(convert);
   }
   virtual Status HandleCopy(HloInstruction* copy) {
-    return HandleElementwiseUnary(copy, HloOpcode::kCopy);
+    return HandleElementwiseUnary(copy);
   }
   virtual Status HandleMultiply(HloInstruction* multiply, HloInstruction* lhs,
                                 HloInstruction* rhs) {
-    return HandleElementwiseBinary(multiply, HloOpcode::kMultiply);
+    return HandleElementwiseBinary(multiply);
   }
   virtual Status HandleDot(HloInstruction* dot, HloInstruction* lhs,
                            HloInstruction* rhs) = 0;
   virtual Status HandlePower(HloInstruction* power, HloInstruction* lhs,
                              HloInstruction* rhs) {
-    return HandleElementwiseBinary(power, HloOpcode::kPower);
+    return HandleElementwiseBinary(power);
   }
   virtual Status HandleConvolution(HloInstruction* convolution,
                                    HloInstruction* lhs, HloInstruction* rhs,
@@ -101,73 +101,72 @@ class DfsHloVisitor {
   virtual Status HandleCrossReplicaSum(HloInstruction* crs) = 0;
   virtual Status HandleCompare(HloInstruction* compare, HloOpcode opcode,
                                HloInstruction* lhs, HloInstruction* rhs) {
-    return HandleElementwiseBinary(compare, opcode);
+    return HandleElementwiseBinary(compare);
   }
   virtual Status HandleAdd(HloInstruction* add, HloInstruction* lhs,
                            HloInstruction* rhs) {
-    return HandleElementwiseBinary(add, HloOpcode::kAdd);
+    return HandleElementwiseBinary(add);
   }
   virtual Status HandleDivide(HloInstruction* divide, HloInstruction* lhs,
                               HloInstruction* rhs) {
-    return HandleElementwiseBinary(divide, HloOpcode::kDivide);
+    return HandleElementwiseBinary(divide);
   }
   virtual Status HandleRemainder(HloInstruction* remainder, HloInstruction* lhs,
                                  HloInstruction* rhs) {
-    return HandleElementwiseBinary(remainder, HloOpcode::kRemainder);
+    return HandleElementwiseBinary(remainder);
   }
   virtual Status HandleSubtract(HloInstruction* subtract, HloInstruction* lhs,
                                 HloInstruction* rhs) {
-    return HandleElementwiseBinary(subtract, HloOpcode::kSubtract);
+    return HandleElementwiseBinary(subtract);
   }
   virtual Status HandleAbs(HloInstruction* abs, HloInstruction* operand) {
-    return HandleElementwiseUnary(abs, HloOpcode::kAbs);
+    return HandleElementwiseUnary(abs);
   }
   virtual Status HandleSign(HloInstruction* sign, HloInstruction* operand) {
-    return HandleElementwiseUnary(sign, HloOpcode::kSign);
+    return HandleElementwiseUnary(sign);
   }
   virtual Status HandleNegate(HloInstruction* negate, HloInstruction* operand) {
-    return HandleElementwiseUnary(negate, HloOpcode::kNegate);
+    return HandleElementwiseUnary(negate);
   }
   virtual Status HandleExp(HloInstruction* exp, HloInstruction* operand) {
-    return HandleElementwiseUnary(exp, HloOpcode::kExp);
+    return HandleElementwiseUnary(exp);
   }
   virtual Status HandleFloor(HloInstruction* floor, HloInstruction* operand) {
-    return HandleElementwiseUnary(floor, HloOpcode::kFloor);
+    return HandleElementwiseUnary(floor);
   }
   virtual Status HandleCeil(HloInstruction* ceil, HloInstruction* operand) {
-    return HandleElementwiseUnary(ceil, HloOpcode::kCeil);
+    return HandleElementwiseUnary(ceil);
   }
   virtual Status HandleLog(HloInstruction* log, HloInstruction* operand) {
-    return HandleElementwiseUnary(log, HloOpcode::kLog);
+    return HandleElementwiseUnary(log);
   }
   virtual Status HandleCos(HloInstruction* cos, HloInstruction* operand) {
-    return HandleElementwiseUnary(cos, HloOpcode::kCos);
+    return HandleElementwiseUnary(cos);
   }
   virtual Status HandleSin(HloInstruction* sin, HloInstruction* operand) {
-    return HandleElementwiseUnary(sin, HloOpcode::kSin);
+    return HandleElementwiseUnary(sin);
   }
   virtual Status HandleTanh(HloInstruction* tanh, HloInstruction* operand) {
-    return HandleElementwiseUnary(tanh, HloOpcode::kTanh);
+    return HandleElementwiseUnary(tanh);
   }
   virtual Status HandleIsFinite(HloInstruction* is_finite,
                                 HloInstruction* operand) {
-    return HandleElementwiseUnary(is_finite, HloOpcode::kIsFinite);
+    return HandleElementwiseUnary(is_finite);
   }
   virtual Status HandleLogicalAnd(HloInstruction* logical_and,
                                   HloInstruction* lhs, HloInstruction* rhs) {
-    return HandleElementwiseBinary(logical_and, HloOpcode::kLogicalAnd);
+    return HandleElementwiseBinary(logical_and);
   }
   virtual Status HandleLogicalNot(HloInstruction* logical_not,
                                   HloInstruction* operand) {
-    return HandleElementwiseUnary(logical_not, HloOpcode::kLogicalNot);
+    return HandleElementwiseUnary(logical_not);
   }
   virtual Status HandleLogicalOr(HloInstruction* logical_or,
                                  HloInstruction* lhs, HloInstruction* rhs) {
-    return HandleElementwiseBinary(logical_or, HloOpcode::kLogicalOr);
+    return HandleElementwiseBinary(logical_or);
   }
   virtual Status HandleReducePrecision(HloInstruction* reduce_precision) {
-    return HandleElementwiseUnary(reduce_precision,
-                                  HloOpcode::kReducePrecision);
+    return HandleElementwiseUnary(reduce_precision);
   }
 
   virtual Status HandleInfeed(HloInstruction* infeed) = 0;
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
index 10f8ae9b04..a5fe120598 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
@@ -41,12 +41,10 @@ class DfsHloVisitorWithDefault : public DfsHloVisitor {
   // Default action performed on HloInstruction.
   virtual Status DefaultAction(HloInstruction* hlo_instruction) = 0;
 
-  Status HandleElementwiseUnary(HloInstruction* hlo,
-                                HloOpcode opcode) override {
+  Status HandleElementwiseUnary(HloInstruction* hlo) override {
     return DefaultAction(hlo);
   }
-  Status HandleElementwiseBinary(HloInstruction* hlo,
-                                 HloOpcode opcode) override {
+  Status HandleElementwiseBinary(HloInstruction* hlo) override {
     return DefaultAction(hlo);
   }
 
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
index 9dbde0ec24..f6b764732b 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
@@ -118,13 +118,11 @@ Status HloCostAnalysis::HandleElementwiseOp(HloInstruction* hlo_instruction) {
   }
 }
 
-Status HloCostAnalysis::HandleElementwiseUnary(HloInstruction* hlo,
-                                               HloOpcode opcode) {
+Status HloCostAnalysis::HandleElementwiseUnary(HloInstruction* hlo) {
   return HandleElementwiseOp(hlo);
 }
 
-Status HloCostAnalysis::HandleElementwiseBinary(HloInstruction* hlo,
-                                                HloOpcode opcode) {
+Status HloCostAnalysis::HandleElementwiseBinary(HloInstruction* hlo) {
   return HandleElementwiseOp(hlo);
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
index 6d8fdfa64b..eeb3d4edd1 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
@@ -49,9 +49,8 @@ class HloCostAnalysis : public DfsHloVisitor {
   using ShapeSizeFunction = std::function;
   explicit HloCostAnalysis(const ShapeSizeFunction& shape_size);
 
-  Status HandleElementwiseUnary(HloInstruction* hlo, HloOpcode opcode) override;
-  Status HandleElementwiseBinary(HloInstruction* hlo,
-                                 HloOpcode opcode) override;
+  Status HandleElementwiseUnary(HloInstruction* hlo) override;
+  Status HandleElementwiseBinary(HloInstruction* hlo) override;
   Status HandleConstant(HloInstruction* constant,
                         const Literal& literal) override;
   Status HandleGetTupleElement(HloInstruction* get_tuple_element,
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 9ba2d54d02..eb6fe5d2e3 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -32,13 +32,11 @@ class ShapeVerifier : public DfsHloVisitor {
       const std::function& shape_size_fn)
       : shape_size_fn_(shape_size_fn) {}
 
-  Status HandleElementwiseUnary(HloInstruction* hlo,
-                                HloOpcode opcode) override {
+  Status HandleElementwiseUnary(HloInstruction* hlo) override {
     return CheckUnaryShape(hlo);
   }
 
-  Status HandleElementwiseBinary(HloInstruction* hlo,
-                                 HloOpcode opcode) override {
+  Status HandleElementwiseBinary(HloInstruction* hlo) override {
     return CheckBinaryShape(hlo);
   }
 
-- 
GitLab


From a4faf517b42f4bef8c9661f9142ba640aaeb25a2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 16:40:26 -0700
Subject: [PATCH 128/294] GroupByDynamicWindowDataset: A new Dataset where the
 window size is a function of the key

PiperOrigin-RevId: 167068616
---
 tensorflow/contrib/data/__init__.py           |   2 +
 .../python/kernel_tests/bucketing_test.py     | 115 ++++++--
 .../contrib/data/python/ops/dataset_ops.py    | 273 ++++++++++++------
 .../kernels/group_by_window_dataset_op.cc     |  78 ++++-
 .../core/ops/compat/ops_history.v1.pbtxt      |  13 +-
 tensorflow/core/ops/dataset_ops.cc            |   5 +-
 tensorflow/core/ops/ops.pbtxt                 |  13 +-
 7 files changed, 359 insertions(+), 140 deletions(-)

diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index 5308ab64ac..1c0a5288f7 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -22,6 +22,7 @@
 
 @@read_batch_features
 @@rejection_resample
+@@group_by_window
 """
 
 from __future__ import absolute_import
@@ -31,6 +32,7 @@ from __future__ import print_function
 # pylint: disable=unused-import
 from tensorflow.contrib.data.python.ops.dataset_ops import Dataset
 from tensorflow.contrib.data.python.ops.dataset_ops import FixedLengthRecordDataset
+from tensorflow.contrib.data.python.ops.dataset_ops import group_by_window
 from tensorflow.contrib.data.python.ops.dataset_ops import Iterator
 from tensorflow.contrib.data.python.ops.dataset_ops import read_batch_features
 from tensorflow.contrib.data.python.ops.dataset_ops import rejection_resample
diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
index 71df1ee0a5..0111aae103 100644
--- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py
@@ -37,7 +37,9 @@ class GroupByWindowTest(test.TestCase):
     components = np.random.randint(100, size=(200,)).astype(np.int64)
     iterator = dataset_ops.Iterator.from_dataset(
         dataset_ops.Dataset.from_tensor_slices(components).map(lambda x: x * x)
-        .group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4), 4))
+        .apply(
+            dataset_ops.group_by_window,
+            args=(lambda x: x % 2, lambda _, xs: xs.batch(4), 4)))
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -61,8 +63,9 @@ class GroupByWindowTest(test.TestCase):
     components = np.array(
         [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0], dtype=np.int64)
     iterator = dataset_ops.Iterator.from_dataset(
-        dataset_ops.Dataset.from_tensor_slices(components).repeat(-1)
-        .group_by_window(lambda x: x % 3, lambda _, xs: xs.batch(4), 4))
+        dataset_ops.Dataset.from_tensor_slices(components).repeat(-1).apply(
+            dataset_ops.group_by_window,
+            args=(lambda x: x % 3, lambda _, xs: xs.batch(4), 4)))
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -81,8 +84,9 @@ class GroupByWindowTest(test.TestCase):
   def testSmallGroups(self):
     components = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], dtype=np.int64)
     iterator = dataset_ops.Iterator.from_dataset(
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4), 4))
+        dataset_ops.Dataset.from_tensor_slices(components).apply(
+            dataset_ops.group_by_window,
+            args=(lambda x: x % 2, lambda _, xs: xs.batch(4), 4)))
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -108,8 +112,9 @@ class GroupByWindowTest(test.TestCase):
 
     iterator = dataset_ops.Iterator.from_dataset(
         dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: (x, ops.convert_to_tensor([x * x])))
-        .group_by_window(lambda x, _: x % 2, reduce_func, 32))
+        .map(lambda x: (x, ops.convert_to_tensor([x * x]))).apply(
+            dataset_ops.group_by_window,
+            args=(lambda x, _: x % 2, reduce_func, 32)))
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -124,17 +129,20 @@ class GroupByWindowTest(test.TestCase):
     def reduce_func(key, window):
       # Apply two different kinds of padding to the input: tight
       # padding, and quantized (to a multiple of 10) padding.
-      return dataset_ops.Dataset.zip((window.padded_batch(
-          4,
-          padded_shapes=tensor_shape.TensorShape([None])), window.padded_batch(
+      return dataset_ops.Dataset.zip((
+          window.padded_batch(
+              4, padded_shapes=tensor_shape.TensorShape([None])),
+          window.padded_batch(
               4, padded_shapes=ops.convert_to_tensor([(key + 1) * 10])),))
 
     iterator = dataset_ops.Iterator.from_dataset(
         dataset_ops.Dataset.from_tensor_slices(components)
         .map(lambda x: array_ops.fill([math_ops.cast(x, dtypes.int32)], x))
-        .group_by_window(
-            lambda x: math_ops.cast(array_ops.shape(x)[0] // 10, dtypes.int64),
-            reduce_func, 4))
+        .apply(
+            dataset_ops.group_by_window,
+            args=
+            (lambda x: math_ops.cast(array_ops.shape(x)[0] // 10, dtypes.int64),
+             reduce_func, 4)))
     init_op = iterator.initializer
     get_next = iterator.get_next()
 
@@ -151,10 +159,9 @@ class GroupByWindowTest(test.TestCase):
       self.assertEqual(len(components), sum(counts))
 
 
-# NOTE(mrry): These tests are based on the tests in
-# bucket_ops_test.py. Currently, different batch sizes for each key
-# are not supported, although this would be possible to add to
-# `Dataset.group_by_window()`.
+# NOTE(mrry): These tests are based on the tests in bucket_ops_test.py.
+# Currently, they use a constant batch size, though should be made to use a
+# different batch size per key.
 class BucketTest(test.TestCase):
 
   def _dynamicPad(self, bucket, window, window_size):
@@ -168,6 +175,7 @@ class BucketTest(test.TestCase):
                  tensor_shape.TensorShape([3])))))
 
   def testSingleBucket(self):
+
     def _map_fn(v):
       return (v, array_ops.fill([v], v),
               array_ops.fill([3], string_ops.as_string(v)))
@@ -175,9 +183,10 @@ class BucketTest(test.TestCase):
     input_dataset = (
         dataset_ops.Dataset.from_tensor_slices(math_ops.range(32)).map(_map_fn))
 
-    bucketed_dataset = input_dataset.group_by_window(
-        lambda x, y, z: 0, lambda k, bucket: self._dynamicPad(k, bucket, 32),
-        32)
+    bucketed_dataset = input_dataset.apply(
+        dataset_ops.group_by_window,
+        args=(lambda x, y, z: 0,
+              lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
 
     iterator = dataset_ops.Iterator.from_dataset(bucketed_dataset)
     init_op = iterator.initializer
@@ -201,6 +210,7 @@ class BucketTest(test.TestCase):
       self.assertAllEqual(expected_vec3_str, bucketed_values[2])
 
   def testEvenOddBuckets(self):
+
     def _map_fn(v):
       return (v, array_ops.fill([v], v),
               array_ops.fill([3], string_ops.as_string(v)))
@@ -208,9 +218,10 @@ class BucketTest(test.TestCase):
     input_dataset = (
         dataset_ops.Dataset.from_tensor_slices(math_ops.range(64)).map(_map_fn))
 
-    bucketed_dataset = input_dataset.group_by_window(
-        lambda x, y, z: math_ops.cast(x % 2, dtypes.int64),
-        lambda k, bucket: self._dynamicPad(k, bucket, 32), 32)
+    bucketed_dataset = input_dataset.apply(
+        dataset_ops.group_by_window,
+        args=(lambda x, y, z: math_ops.cast(x % 2, dtypes.int64),
+              lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
 
     iterator = dataset_ops.Iterator.from_dataset(bucketed_dataset)
     init_op = iterator.initializer
@@ -256,25 +267,31 @@ class BucketTest(test.TestCase):
       self.assertAllEqual(expected_vec3_str, bucketed_values_odd[2])
 
   def testEvenOddBucketsFilterOutAllOdd(self):
+
     def _map_fn(v):
-      return {"x": v,
-              "y": array_ops.fill([v], v),
-              "z": array_ops.fill([3], string_ops.as_string(v))}
+      return {
+          "x": v,
+          "y": array_ops.fill([v], v),
+          "z": array_ops.fill([3], string_ops.as_string(v))
+      }
 
     def _dynamic_pad_fn(bucket, window, _):
       return dataset_ops.Dataset.zip(
           (dataset_ops.Dataset.from_tensors(bucket), window.padded_batch(
-              32, {"x": tensor_shape.TensorShape([]),
-                   "y": tensor_shape.TensorShape([None]),
-                   "z": tensor_shape.TensorShape([3])})))
+              32, {
+                  "x": tensor_shape.TensorShape([]),
+                  "y": tensor_shape.TensorShape([None]),
+                  "z": tensor_shape.TensorShape([3])
+              })))
 
     input_dataset = (
         dataset_ops.Dataset.from_tensor_slices(math_ops.range(128)).map(_map_fn)
         .filter(lambda d: math_ops.equal(d["x"] % 2, 0)))
 
-    bucketed_dataset = input_dataset.group_by_window(
-        lambda d: math_ops.cast(d["x"] % 2, dtypes.int64),
-        lambda k, bucket: _dynamic_pad_fn(k, bucket, 32), 32)
+    bucketed_dataset = input_dataset.apply(
+        dataset_ops.group_by_window,
+        args=(lambda d: math_ops.cast(d["x"] % 2, dtypes.int64),
+              lambda k, bucket: _dynamic_pad_fn(k, bucket, 32), 32))
 
     iterator = dataset_ops.Iterator.from_dataset(bucketed_dataset)
     init_op = iterator.initializer
@@ -295,6 +312,40 @@ class BucketTest(test.TestCase):
       self.assertAllEqual(
           np.arange(64, 128, 2, dtype=np.int64), bucketed_values_even1["x"])
 
+  def testDynamicWindowSize(self):
+    components = np.arange(100).astype(np.int64)
+
+    # Key fn: even/odd
+    # Reduce fn: batches of 5
+    # Window size fn: even=5, odd=10
+
+    def window_size_func(key):
+      window_sizes = constant_op.constant([5, 10], dtype=dtypes.int64)
+      return window_sizes[key]
+
+    dataset = dataset_ops.Dataset.from_tensor_slices(components).apply(
+        dataset_ops.group_by_window,
+        args=(lambda x: x % 2, lambda _, xs: xs.batch(20), None,
+              window_size_func))
+    iterator = dataset_ops.Iterator.from_dataset(dataset)
+    init_op = iterator.initializer
+    get_next = iterator.get_next()
+
+    with self.test_session() as sess:
+      sess.run(init_op)
+      with self.assertRaises(errors.OutOfRangeError):
+        batches = 0
+        while True:
+          result = sess.run(get_next)
+          is_even = all(x % 2 == 0 for x in result)
+          is_odd = all(x % 2 == 1 for x in result)
+          self.assertTrue(is_even or is_odd)
+          expected_batch_size = 5 if is_even else 10
+          self.assertEqual(expected_batch_size, result.shape[0])
+          batches += 1
+
+      self.assertEqual(batches, 15)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py
index 46af2b1949..abf7bcb384 100644
--- a/tensorflow/contrib/data/python/ops/dataset_ops.py
+++ b/tensorflow/contrib/data/python/ops/dataset_ops.py
@@ -1199,28 +1199,9 @@ class Dataset(object):
     return DenseToSparseBatchDataset(self, batch_size, row_shape)
 
   def group_by_window(self, key_func, reduce_func, window_size):
-    """Performs a windowed "group-by" operation on this dataset.
-
-    This method maps each consecutive element in this dataset to a key
-    using `key_func` and groups the elements by key. It then applies
-    `reduce_func` to at most `window_size` elements matching the same
-    key. All execpt the final window for each key will contain
-    `window_size` elements; the final window may be smaller.
-
-    Args:
-      key_func: A function mapping a nested structure of tensors
-        (having shapes and types defined by `self.output_shapes` and
-        `self.output_types`) to a scalar `tf.int64` tensor.
-      reduce_func: A function mapping a key and a dataset of up to `batch_size`
-        consecutive elements matching that key to another dataset.
-      window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
-        consecutive elements matching the same key to combine in a single
-        batch, which will be passed to `reduce_func`.
-
-    Returns:
-      A `Dataset`.
-    """
-    return GroupByWindowDataset(self, key_func, reduce_func, window_size)
+    """See group_by_window()."""
+    return self.apply(
+        group_by_window, args=(key_func, reduce_func, window_size))
 
   def map(self,
           map_func,
@@ -1370,6 +1351,43 @@ class Dataset(object):
     """
     return FilterDataset(self, predicate)
 
+  def apply(self, fn, args=(), kwargs={}):  # pylint: disable=dangerous-default-value
+    """Apply a function to this dataset.
+
+    `apply` enables chaining of custom `Dataset` transformations.
+
+    For example:
+
+    ```
+    dataset.map(
+        lambda x: x**2
+    ).apply(
+        group_by_window, args=(key_func, reduce_func, window_size)
+    ).map(
+        lambda x: x**3
+    )
+    ```
+
+    Args:
+      fn: A function that takes a `Dataset`, `args`, and `kwargs`, and
+        returns a `Dataset`.
+      args: A `tuple` or `list` of arguments to be passed to `fn`.
+      kwargs: A `dict` of keyword arguments to be passed to `fn`.
+
+    Returns:
+      The `Dataset` returned by `fn`.
+    """
+    if not (isinstance(args, tuple) or isinstance(args, list)):
+      raise TypeError("args must be a tuple or list.")
+    if not isinstance(kwargs, dict):
+      raise TypeError("kwargs must be a dict.")
+
+    dataset = fn(self, *args, **kwargs)
+
+    if not isinstance(dataset, Dataset):
+      raise TypeError("fn must return a Dataset.")
+    return dataset
+
 
 class TensorDataset(Dataset):
   """A `Dataset` with a single element, viz. a nested structure of tensors."""
@@ -1927,71 +1945,6 @@ class _ResourceDataset(Dataset):
     return self._output_types
 
 
-class GroupByWindowDataset(Dataset):
-  """A `Dataset` that groups its input and performs a windowed reduction."""
-
-  def __init__(self, input_dataset, key_func, reduce_func, window_size):
-    """See `Dataset.group_by_window()` for details."""
-    super(GroupByWindowDataset, self).__init__()
-    self._input_dataset = input_dataset
-    self._window_size = window_size
-
-    @function.Defun(*nest.flatten(input_dataset.output_types))
-    def tf_key_func(*args):
-      """A wrapper for Defun that facilitates shape inference."""
-      # Pass in shape information from the input_dataset.
-      for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)):
-        arg.set_shape(shape)
-      nested_args = nest.pack_sequence_as(input_dataset.output_types, args)
-      if _should_unpack_args(nested_args):
-        ret = key_func(*nested_args)
-      else:
-        ret = key_func(nested_args)
-      ret = ops.convert_to_tensor(ret, dtype=dtypes.int64)
-      if ret.dtype != dtypes.int64:
-        raise ValueError("`key_func` must return a single tf.int64 tensor.")
-      return ret
-
-    self._key_func = tf_key_func
-    self._key_func.add_to_graph(ops.get_default_graph())
-
-    @function.Defun(dtypes.int64, dtypes.resource)
-    def tf_reduce_func(key, window_dataset_resource):
-      """A wrapper for Defun that facilitates shape inference."""
-      key.set_shape([])
-      window_dataset = _ResourceDataset(window_dataset_resource,
-                                        input_dataset.output_types,
-                                        input_dataset.output_shapes)
-      output_dataset = reduce_func(key, window_dataset)
-      if not isinstance(output_dataset, Dataset):
-        raise TypeError("`reduce_func` must return a `Dataset` object.")
-      self._output_types = output_dataset.output_types
-      self._output_shapes = output_dataset.output_shapes
-      return output_dataset.make_dataset_resource()
-
-    self._reduce_func = tf_reduce_func
-    self._reduce_func.add_to_graph(ops.get_default_graph())
-
-  def make_dataset_resource(self):
-    return gen_dataset_ops.group_by_window_dataset(
-        self._input_dataset.make_dataset_resource(),
-        self._key_func.captured_inputs,
-        self._reduce_func.captured_inputs,
-        self._window_size,
-        key_func=self._key_func,
-        reduce_func=self._reduce_func,
-        output_types=nest.flatten(self.output_types),
-        output_shapes=nest.flatten(self.output_shapes))
-
-  @property
-  def output_shapes(self):
-    return self._output_shapes
-
-  @property
-  def output_types(self):
-    return self._output_types
-
-
 class MapDataset(Dataset):
   """A `Dataset` that maps a function over elements in its input."""
 
@@ -2660,3 +2613,149 @@ def _get_file_names(file_pattern, randomize_input):
   if not randomize_input:
     file_names = sorted(file_names)
   return file_names
+
+
+class GroupByWindowDataset(Dataset):
+  """A `Dataset` that groups its input and performs a windowed reduction."""
+
+  def __init__(self, input_dataset, key_func, reduce_func, window_size_func):
+    """See `group_by_window()` for details."""
+    super(GroupByWindowDataset, self).__init__()
+
+    self._input_dataset = input_dataset
+
+    self._make_key_func(key_func, input_dataset)
+    self._make_reduce_func(reduce_func, input_dataset)
+    self._make_window_size_func(window_size_func)
+
+  def _make_window_size_func(self, window_size_func):
+    """Make wrapping Defun for window_size_func."""
+
+    @function.Defun(dtypes.int64)
+    def tf_window_size_func(key):
+      key.set_shape([])
+      window_size = ops.convert_to_tensor(
+          window_size_func(key), dtype=dtypes.int64)
+      if window_size.dtype != dtypes.int64:
+        raise ValueError(
+            "`window_size_func` must return a single tf.int64 tensor.")
+      return window_size
+
+    self._window_size_func = tf_window_size_func
+    self._window_size_func.add_to_graph(ops.get_default_graph())
+
+  def _make_key_func(self, key_func, input_dataset):
+    """Make wrapping Defun for key_func."""
+
+    @function.Defun(*nest.flatten(input_dataset.output_types))
+    def tf_key_func(*args):
+      """A wrapper for Defun that facilitates shape inference."""
+      # Pass in shape information from the input_dataset.
+      for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)):
+        arg.set_shape(shape)
+      nested_args = nest.pack_sequence_as(input_dataset.output_types, args)
+      if _should_unpack_args(nested_args):
+        ret = key_func(*nested_args)
+      else:
+        ret = key_func(nested_args)
+      ret = ops.convert_to_tensor(ret, dtype=dtypes.int64)
+      if ret.dtype != dtypes.int64:
+        raise ValueError("`key_func` must return a single tf.int64 tensor.")
+      return ret
+
+    self._key_func = tf_key_func
+    self._key_func.add_to_graph(ops.get_default_graph())
+
+  def _make_reduce_func(self, reduce_func, input_dataset):
+    """Make wrapping Defun for reduce_func."""
+
+    @function.Defun(dtypes.int64, dtypes.resource)
+    def tf_reduce_func(key, window_dataset_resource):
+      """A wrapper for Defun that facilitates shape inference."""
+      key.set_shape([])
+      window_dataset = _ResourceDataset(window_dataset_resource,
+                                        input_dataset.output_types,
+                                        input_dataset.output_shapes)
+      output_dataset = reduce_func(key, window_dataset)
+      if not isinstance(output_dataset, Dataset):
+        raise TypeError("`reduce_func` must return a `Dataset` object.")
+      self._output_types = output_dataset.output_types
+      self._output_shapes = output_dataset.output_shapes
+      return output_dataset.make_dataset_resource()
+
+    self._reduce_func = tf_reduce_func
+    self._reduce_func.add_to_graph(ops.get_default_graph())
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+  def make_dataset_resource(self):
+    return gen_dataset_ops.group_by_window_dataset(
+        self._input_dataset.make_dataset_resource(),
+        self._key_func.captured_inputs,
+        self._reduce_func.captured_inputs,
+        self._window_size_func.captured_inputs,
+        key_func=self._key_func,
+        reduce_func=self._reduce_func,
+        window_size_func=self._window_size_func,
+        output_types=nest.flatten(self.output_types),
+        output_shapes=nest.flatten(self.output_shapes))
+
+
+def group_by_window(dataset,
+                    key_func,
+                    reduce_func,
+                    window_size=None,
+                    window_size_func=None):
+  """Performs a windowed "group-by" operation on this dataset.
+
+  This method maps each consecutive element in this dataset to a key
+  using `key_func` and groups the elements by key. It then applies
+  `reduce_func` to at most `window_size_func(key)` elements matching the same
+  key. All execpt the final window for each key will contain
+  `window_size_func(key)` elements; the final window may be smaller.
+
+  You may provide either a constant `window_size` or a window size determined by
+  the key through `window_size_func`.
+
+  Args:
+    dataset: A `Dataset`.
+    key_func: A function mapping a nested structure of tensors
+      (having shapes and types defined by `self.output_shapes` and
+      `self.output_types`) to a scalar `tf.int64` tensor.
+    reduce_func: A function mapping a key and a dataset of up to `batch_size`
+      consecutive elements matching that key to another dataset.
+    window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
+      consecutive elements matching the same key to combine in a single
+      batch, which will be passed to `reduce_func`. Mutually exclusive with
+      `window_size_func`.
+    window_size_func: A function mapping a key to a `tf.int64` scalar
+      `tf.Tensor`, representing the number of consecutive elements matching
+      the same key to combine in a single batch, which will be passed to
+      `reduce_func`. Mutually exclusive with `window_size`.
+
+  Returns:
+    A `Dataset`.
+
+  Raises:
+    ValueError: if neither or both of {`window_size`, `window_size_func`} are
+      passed.
+  """
+  if (window_size is not None and window_size_func or
+      not (window_size is not None or window_size_func)):
+    raise ValueError("Must pass either window_size or window_size_func.")
+
+  if window_size is not None:
+
+    def constant_window_func(unused_key):
+      return ops.convert_to_tensor(window_size, dtype=dtypes.int64)
+
+    window_size_func = constant_window_func
+
+  assert window_size_func is not None
+  return GroupByWindowDataset(dataset, key_func, reduce_func, window_size_func)
diff --git a/tensorflow/core/kernels/group_by_window_dataset_op.cc b/tensorflow/core/kernels/group_by_window_dataset_op.cc
index a53e9456ad..a4f9608b1f 100644
--- a/tensorflow/core/kernels/group_by_window_dataset_op.cc
+++ b/tensorflow/core/kernels/group_by_window_dataset_op.cc
@@ -36,20 +36,14 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
         graph_def_version_(ctx->graph_def_version()) {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("key_func", &key_func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("reduce_func", &reduce_func_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("window_size_func", &window_size_func_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
   }
 
   void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
                    DatasetBase** output) override {
-    int64 window_size = 0;
-    OP_REQUIRES_OK(
-        ctx, ParseScalarArgument(ctx, "window_size", &window_size));
-    OP_REQUIRES(
-        ctx, window_size > 0,
-        errors::InvalidArgument("Window size must be greater than zero."));
-
-    // Get captured inputs for the key and reduce functions.
+    // Get captured inputs for the key, reduce, and window_size functions.
     OpInputList key_func_other_argument_inputs;
     OP_REQUIRES_OK(ctx, ctx->input_list("key_func_other_arguments",
                                         &key_func_other_argument_inputs));
@@ -67,6 +61,16 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
     for (const Tensor& t : reduce_func_other_argument_inputs) {
       reduce_func_other_arguments.push_back(t);
     }
+    OpInputList window_size_func_other_argument_inputs;
+    OP_REQUIRES_OK(ctx,
+                   ctx->input_list("window_size_func_other_arguments",
+                                   &window_size_func_other_argument_inputs));
+    std::vector window_size_func_other_arguments;
+    window_size_func_other_arguments.reserve(
+        window_size_func_other_argument_inputs.size());
+    for (const Tensor& t : window_size_func_other_argument_inputs) {
+      window_size_func_other_arguments.push_back(t);
+    }
     // TODO(mrry): Refactor CapturedFunction to share the runtime
     // state between multiple functions?
     std::unique_ptr captured_key_func;
@@ -79,24 +83,30 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
         ctx, CapturedFunction::Create(ctx, reduce_func_, graph_def_version_,
                                       std::move(reduce_func_other_arguments),
                                       &captured_reduce_func));
-
-    *output = new Dataset(input, window_size, std::move(captured_key_func),
-                          std::move(captured_reduce_func), output_types_,
-                          output_shapes_);
+    std::unique_ptr captured_window_size_func;
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(
+                            ctx, window_size_func_, graph_def_version_,
+                            std::move(window_size_func_other_arguments),
+                            &captured_window_size_func));
+
+    *output = new Dataset(
+        input, std::move(captured_key_func), std::move(captured_reduce_func),
+        std::move(captured_window_size_func), output_types_, output_shapes_);
   }
 
  private:
   class Dataset : public DatasetBase {
    public:
-    Dataset(const DatasetBase* input, int64 window_size,
+    Dataset(const DatasetBase* input,
             std::unique_ptr captured_key_func,
             std::unique_ptr captured_reduce_func,
+            std::unique_ptr captured_window_size_func,
             const DataTypeVector& output_types,
             const std::vector& output_shapes)
         : input_(input),
-          window_size_(window_size),
           captured_key_func_(std::move(captured_key_func)),
           captured_reduce_func_(std::move(captured_reduce_func)),
+          captured_window_size_func_(std::move(captured_window_size_func)),
           output_types_(output_types),
           output_shapes_(output_shapes) {
       input_->Ref();
@@ -182,10 +192,44 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
               }
               const int64 key = key_func_output[0].scalar()();
 
+              if (window_sizes_.find(key) == window_sizes_.end()) {
+                // Run window_size function
+                FunctionLibraryRuntime::Options opts2;
+                opts2.step_id = CapturedFunction::generate_step_id();
+                opts2.runner = ctx->runner();
+                ScopedStepContainer step_container2(
+                    opts2.step_id, [this, ctx](const string& name) {
+                      dataset()
+                          ->captured_window_size_func_->resource_manager()
+                          ->Cleanup(name)
+                          .IgnoreError();
+                    });
+                opts2.step_container = &step_container2;
+
+                // Run the window size function on the key to identify its
+                // window size.
+                std::vector window_size_func_output;
+                TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Run(
+                    opts2, key_func_output, &window_size_func_output));
+
+                if (window_size_func_output.size() != 1 ||
+                    window_size_func_output[0].dtype() != DT_INT64 ||
+                    window_size_func_output[0].NumElements() != 1) {
+                  // TODO(mrry): Support non-int64 window sizes.
+                  return errors::InvalidArgument(
+                      "`window_size_func` must return a scalar int64.");
+                }
+                const int64 window_size =
+                    window_size_func_output[0].scalar()();
+                window_sizes_[key] = window_size;
+              }
+
+              const int64 window_size = window_sizes_[key];
+
               std::vector>& group = groups_[key];
               group.push_back(std::move(next_input_element));
 
-              if (group.size() == dataset()->window_size_) {
+              if (group.size() == window_size) {
                 TF_RETURN_IF_ERROR(StartFlushingGroup(ctx, key));
                 break;
               }
@@ -297,6 +341,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
       bool end_of_input_ GUARDED_BY(mu_) = false;
       std::map>> groups_ GUARDED_BY(mu_);
       std::unique_ptr current_group_iterator_ GUARDED_BY(mu_);
+      std::map window_sizes_ GUARDED_BY(mu_);
     };
 
     // A resource name for the temporary window dataset that is
@@ -304,9 +349,9 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
     static constexpr const char* kWindowResourceName = "__window_dataset";
 
     const DatasetBase* const input_;
-    const int64 window_size_;
     const std::unique_ptr captured_key_func_;
     const std::unique_ptr captured_reduce_func_;
+    const std::unique_ptr captured_window_size_func_;
     const DataTypeVector output_types_;
     const std::vector output_shapes_;
   };
@@ -316,6 +361,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
   std::vector output_shapes_;
   const NameAttrList* key_func_;
   const NameAttrList* reduce_func_;
+  const NameAttrList* window_size_func_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("GroupByWindowDataset").Device(DEVICE_CPU),
diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index ad290d123e..22d4a0056f 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -10467,8 +10467,8 @@ op {
     type_list_attr: "Treduce_func_other_arguments"
   }
   input_arg {
-    name: "window_size"
-    type: DT_INT64
+    name: "window_size_func_other_arguments"
+    type_list_attr: "Twindow_size_func_other_arguments"
   }
   output_arg {
     name: "handle"
@@ -10482,6 +10482,10 @@ op {
     name: "reduce_func"
     type: "func"
   }
+  attr {
+    name: "window_size_func"
+    type: "func"
+  }
   attr {
     name: "Tkey_func_other_arguments"
     type: "list(type)"
@@ -10492,6 +10496,11 @@ op {
     type: "list(type)"
     has_minimum: true
   }
+  attr {
+    name: "Twindow_size_func_other_arguments"
+    type: "list(type)"
+    has_minimum: true
+  }
   attr {
     name: "output_types"
     type: "list(type)"
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index f6bd5768d7..37d9a737e2 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -237,12 +237,15 @@ REGISTER_OP("GroupByWindowDataset")
     .Input("input_dataset: resource")
     .Input("key_func_other_arguments: Tkey_func_other_arguments")
     .Input("reduce_func_other_arguments: Treduce_func_other_arguments")
-    .Input("window_size: int64")
+    .Input(
+        "window_size_func_other_arguments: Twindow_size_func_other_arguments")
     .Output("handle: resource")
     .Attr("key_func: func")
     .Attr("reduce_func: func")
+    .Attr("window_size_func: func")
     .Attr("Tkey_func_other_arguments: list(type) >= 0")
     .Attr("Treduce_func_other_arguments: list(type) >= 0")
+    .Attr("Twindow_size_func_other_arguments: list(type) >= 0")
     .Attr("output_types: list(type) >= 1")
     .Attr("output_shapes: list(shape) >= 1")
     .SetShapeFn(shape_inference::ScalarShape)
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 13356e1d8a..63b7532b33 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -9611,8 +9611,8 @@ op {
     type_list_attr: "Treduce_func_other_arguments"
   }
   input_arg {
-    name: "window_size"
-    type: DT_INT64
+    name: "window_size_func_other_arguments"
+    type_list_attr: "Twindow_size_func_other_arguments"
   }
   output_arg {
     name: "handle"
@@ -9627,6 +9627,10 @@ op {
     name: "reduce_func"
     type: "func"
   }
+  attr {
+    name: "window_size_func"
+    type: "func"
+  }
   attr {
     name: "Tkey_func_other_arguments"
     type: "list(type)"
@@ -9637,6 +9641,11 @@ op {
     type: "list(type)"
     has_minimum: true
   }
+  attr {
+    name: "Twindow_size_func_other_arguments"
+    type: "list(type)"
+    has_minimum: true
+  }
   attr {
     name: "output_types"
     type: "list(type)"
-- 
GitLab


From 66ed3d877ff6f2639339e7661e7fa8b2664190a5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 16:41:50 -0700
Subject: [PATCH 129/294] Move most of checkpoint_ops and tests from contrib to
 core (private in core with public contrib alias).  Modified unit tests to use
 ephemeral checkpoints / vocabularies instead of checked-in testdata.

PiperOrigin-RevId: 167068777
---
 .../framework/python/ops/checkpoint_ops.py    | 436 +----------------
 .../python/ops/checkpoint_ops_test.py         | 245 ----------
 tensorflow/python/BUILD                       |  23 +
 tensorflow/python/training/checkpoint_ops.py  | 453 ++++++++++++++++++
 .../python/training/checkpoint_ops_test.py    | 305 ++++++++++++
 5 files changed, 787 insertions(+), 675 deletions(-)
 create mode 100644 tensorflow/python/training/checkpoint_ops.py
 create mode 100644 tensorflow/python/training/checkpoint_ops_test.py

diff --git a/tensorflow/contrib/framework/python/ops/checkpoint_ops.py b/tensorflow/contrib/framework/python/ops/checkpoint_ops.py
index 848e26ab96..26146790b6 100644
--- a/tensorflow/contrib/framework/python/ops/checkpoint_ops.py
+++ b/tensorflow/contrib/framework/python/ops/checkpoint_ops.py
@@ -17,440 +17,16 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
-
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_checkpoint_ops
 from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-
-ops.NotDifferentiable("GenerateVocabRemapping")
-ops.NotDifferentiable("LoadAndRemapMatrix")
-
-
-def _load_and_remap_matrix(ckpt_path,
-                           old_tensor_name,
-                           new_row_vocab_offset,
-                           num_rows_to_load,
-                           new_col_vocab_size,
-                           initializer,
-                           old_row_vocab_file=None,
-                           new_row_vocab_file=None,
-                           old_col_vocab_file=None,
-                           new_col_vocab_file=None,
-                           num_row_oov_buckets=0,
-                           num_col_oov_buckets=0,
-                           max_rows_in_memory=-1):
-  """Loads a 2-D (matrix) `Tensor` from checkpoint.
-
-  Generates 1D-remappings for rows and columns using the
-  `GenerateVocabRemapping` op, and initializes any anticipated values with the
-  provided initializer. Then, uses the `LoadAndRemapMatrix` op to create a
-  matrix that loads existing values from the checkpoint, while filling out
-  "missing" values with the newly initialized values. See
-  contrib/framework/ops/checkpoint_ops.cc for more information on the wrapped
-  functionality (LoadAndRemapMatrix). This wrapper can be used to perform only
-  row remapping or only col remapping. If only row remapping is desired,
-  {new,old}_col_vocab_file should be `None`, and vice versa for column
-  remapping.
-
-  NOTE: This only supports div-partitioning the vocabulary on the 1st dimension
-  (row axis) via `new_row_vocab_offset`.
-
-  Args:
-    ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`)
-      from which the old matrix `Tensor` will be loaded.
-    old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
-    new_row_vocab_offset: A 0-indexed integer representing what line to
-      start reading at in the new row vocabulary. Used for partitioned
-      variables.
-    num_rows_to_load: Number of rows to load for the new vocabulary (note: to
-      support variable partitioning and partial loading, this does not need to
-      be the same as the number of entries in `new_row_vocab_file`).
-    new_col_vocab_size: Number of columns to load - should be the same as the
-      number of entries in `new_col_vocab_file`, since we don't support
-      partitioning along the column axis.
-    initializer: Callable initializer function that accepts a 1-D tensor as the
-      arg to specify the shape of the returned tensor. Used to initialize
-      missing values.
-    old_row_vocab_file: A scalar `Tensor` of type `string` containing the
-      path to the old row vocabulary file. Can be None, which represents no
-      remapping on the row axis.
-    new_row_vocab_file: A scalar `Tensor` of type `string` containing the path
-      to the new row vocabulary file. Can be None, which represents no remapping
-      on the row axis - in which case, `new_row_vocab_offset` and
-      `num_rows_to_load` work under the assumption that the new row vocab is the
-      same as the old row vocab.
-    old_col_vocab_file: A scalar `Tensor` of type `string` containing the
-      path to the old column vocabulary file. Can be None, which represents no
-      remapping on the column axis.
-    new_col_vocab_file: A scalar `Tensor` of type `string` containing the path
-      to the new column vocabulary file. Can be None, which represents no
-      remapping on the column axis - in which case, `new_col_vocab_size` works
-      under the assumption that the new col vocab is the same as the old col
-      vocab.
-    num_row_oov_buckets: `int` specifying the number of out-of-vocabulary rows
-      to append. Must be >= 0.
-    num_col_oov_buckets: `int` specifying the number of out-of-vocabulary
-      columns to append. Must be >= 0.
-    max_rows_in_memory: `int` specifying the maximum number of rows to load from
-      the checkpoint at once. If less than or equal to 0, the entire matrix will
-      be loaded into memory. Setting this arg trades increased disk reads for
-      lower memory usage.
-
-  Returns:
-    A Tensor of shape `[num_rows_to_load + num_row_oov_buckets,
-    new_col_vocab_size + num_col_oov_buckets]`, with values loaded from the
-    specified tensor in the checkpoint, and any missing or OOV values
-    initialized with the given `initializer`.
-
-  Raises:
-    ValueError: If `num_row_oov_buckets` or `num_col_oov_buckets` < 0.
-    ValueError: If either `old_row_vocab_file` or `new_row_vocab_file` is
-      provided, while the other is not. Same for `old_col_vocab_file` and
-      `new_col_vocab_file`.
-    ValueError: If neither row vocabs or col vocabs are provided.
-  """
-  if num_row_oov_buckets < 0:
-    raise ValueError("num_row_oov_buckets must be >= 0, but received %d" %
-                     num_row_oov_buckets)
-  if num_col_oov_buckets < 0:
-    raise ValueError("num_col_oov_buckets must be >= 0, but received %d" %
-                     num_col_oov_buckets)
-
-  if bool(old_row_vocab_file) != bool(new_row_vocab_file):
-    raise ValueError(
-        "old_row_vocab_file and new_row_vocab_file must both be specified or "
-        "left unspecified. old_row_vocab_file='{}', new_row_vocab_file='{}'".
-        format(old_row_vocab_file, new_row_vocab_file))
-  if bool(old_col_vocab_file) != bool(new_col_vocab_file):
-    raise ValueError(
-        "old_col_vocab_file and new_col_vocab_file must both be specified or "
-        "left unspecified. old_col_vocab_file='{}', new_col_vocab_file='{}'".
-        format(old_col_vocab_file, new_col_vocab_file))
-
-  remap_rows = new_row_vocab_file and old_row_vocab_file
-  remap_cols = new_col_vocab_file and old_col_vocab_file
-  if not (remap_rows or remap_cols):
-    raise ValueError(
-        "Must provide either row or column vocab files. If no remapping is "
-        "necessary, consider using `tf.contrib.framework.init_from_checkpoint` "
-        "instead.")
-
-  num_rows_present = num_rows_to_load
-  if remap_rows:
-    row_remapping, num_rows_present = (
-        gen_checkpoint_ops._generate_vocab_remapping(  # pylint: disable=protected-access
-            new_vocab_file=new_row_vocab_file,
-            old_vocab_file=old_row_vocab_file,
-            new_vocab_offset=new_row_vocab_offset,
-            num_new_vocab=num_rows_to_load))
-  else:
-    # Even when the rows are not being reordered, we still need to generate a
-    # remapping to account for initializing partitioned Variables (when
-    # new_row_vocab_offset is non-zero).
-    row_remapping = math_ops.range(
-        new_row_vocab_offset,
-        new_row_vocab_offset + num_rows_to_load,
-        dtype=dtypes.int64)
+from tensorflow.python.training import checkpoint_ops
 
-  col_remapping = []
-  num_cols_present = new_col_vocab_size
-  if remap_cols:
-    col_remapping, num_cols_present = (
-        gen_checkpoint_ops._generate_vocab_remapping(  # pylint: disable=protected-access
-            new_vocab_file=new_col_vocab_file,
-            old_vocab_file=old_col_vocab_file,
-            new_vocab_offset=0,  # Offset is unused for cols (no partitioning).
-            num_new_vocab=new_col_vocab_size))
 
-  init_vals = initializer([
-      num_rows_to_load * new_col_vocab_size -
-      num_rows_present * num_cols_present, 1
-  ])
-  return_tensor = gen_checkpoint_ops._load_and_remap_matrix(  # pylint: disable=protected-access
-      ckpt_path=ckpt_path,
-      old_tensor_name=old_tensor_name,
-      row_remapping=row_remapping,
-      col_remapping=col_remapping,
-      initializing_values=init_vals,
-      num_rows=num_rows_to_load,
-      num_cols=new_col_vocab_size,
-      max_rows_in_memory=max_rows_in_memory)
-
-  # Add OOV row(s) and column(s).
-  if num_row_oov_buckets > 0:
-    init_row_oov_val = initializer([num_row_oov_buckets, new_col_vocab_size])
-    init_row_oov_val = ops.convert_to_tensor(init_row_oov_val)
-    return_tensor = array_ops.concat([return_tensor, init_row_oov_val], 0)
-  if num_col_oov_buckets > 0:
-    # We need to add any row OOV to the new column shape.
-    init_col_oov_val = initializer(
-        [num_rows_to_load + num_row_oov_buckets, num_col_oov_buckets])
-    init_col_oov_val = ops.convert_to_tensor(init_col_oov_val)
-    return_tensor = array_ops.concat([return_tensor, init_col_oov_val], 1)
-
-  return return_tensor
-
-
-def load_and_remap_matrix_initializer(ckpt_path,
-                                      old_tensor_name,
-                                      new_row_vocab_size,
-                                      new_col_vocab_size,
-                                      old_row_vocab_file=None,
-                                      new_row_vocab_file=None,
-                                      old_col_vocab_file=None,
-                                      new_col_vocab_file=None,
-                                      num_row_oov_buckets=0,
-                                      num_col_oov_buckets=0,
-                                      initializer=None,
-                                      max_rows_in_memory=-1):
-  r"""Returns a var initializer for loading and remapping a 2-D (matrix) tensor.
-
-  The returned initializer loads a 2-D (matrix) `Tensor` with name
-  `old_tensor_name` from the checkpoint at `ckpt_path`. It will reorder the
-  rows/columns according to the specified vocab files and append additional
-  out-of-vocabulary rows/columns according to the number of OOV buckets.
-
-  The format of the file at the `{old,new}_{row,col}_vocab_file` path should be
-  a text file, with each line containing a single entity within the vocabulary.
-  Let the function `line_of(f, "x")` return the 0-indexed line number of the
-  entity "x" in file f, and the function `entity_at(f, i)` return the entity at
-  line i of file f. Then, row i of the new output matrix will be taken from row
-  `line_of(old_row_vocab_file, entity_at(new_row_vocab_file, i))` of the old
-  matrix. If any entity in `new_row_vocab_file` is not found in
-  `old_row_vocab_file`, that row is considered a "missing" row, and its values
-  will be initialized using the `initializer` arg. The same logic also applies
-  for the columns.
-
-  For example, assuming that:
-
-  * `old_row_vocab_file` contains "mercury\nvenus\nmars"
-  * `new_row_vocab_file` contains "venus\njupiter\nmercury"
-  * `old_col_vocab_file` contains "good\nbetter\nbest"
-  * `new_col_vocab_file` contains "good\nbest\nfantastic"
-  * `initializer` returns the natural numbers `[1, 2, 3, 4, ...]`
-  * `w(i, j)` represents the value from row i, column j of the old matrix
-
-  Then the new output matrix will look like:
-
-  `[[w(1, 0), w(1, 2), 1],
-    [2,       3,       4],
-    [w(0, 0), w(0, 2), 5]]`
-
-  If we further specify that:
-
-  * `num_row_oov_buckets` == 2
-  * `num_col_oov_buckets` == 1
-
-  Then the new output matrix will look like:
-
-  `[[w(1, 0), w(1, 2), 1,  12],
-    [2,       3,       4,  13],
-    [w(0, 0), w(0, 2), 5,  14],
-    [6,       7,       8,  15],
-    [9,       10,      11, 16]]`
-
-  If `{old,new}_row_vocab_file` are None, we assume that the old and new row
-  vocab files are the same, and no row remapping is done. If
-  `{old,new}_col_vocab_file` are None, we assume that the old and new column
-  vocab files are the same, and no column remapping is done.
-
-  The returned initializer only supports div-partitioning along the row axis. It
-  does not support partitioning along the column axis or mod-partitioning.
-
-  NOTE: When this is used to warm-start variables, client code should use
-  `tf.lookup.index_table_from_tensor()` like
-  contrib/layers/python/layers/feature_column.py does, as opposed to
-  `tf.feature_to_id()` - in order to ensure the underlying lookup tables are the
-  same.
-
-  Args:
-    ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`)
-      from which the old matrix `Tensor` will be loaded.
-    old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
-    new_row_vocab_size: `int` specifying the number of entries in
-      `new_row_vocab_file`. If no row remapping is needed (no row vocab
-      provided), this should be equal to the number of rows to load from the old
-      matrix (which can theoretically be smaller than the number of rows in the
-      old matrix).
-    new_col_vocab_size: `int` specifying the number of entries in
-      `new_col_vocab_file`. If no column remapping is needed (no column vocab
-      provided), this should be equal to the number of columns in the old
-      matrix.
-    old_row_vocab_file: A scalar `Tensor` of type `string` containing the
-      path to the old row vocabulary file. Can be None, which represents no
-      remapping on the row axis.
-    new_row_vocab_file: A scalar `Tensor` of type `string` containing the path
-      to the new row vocabulary file. Can be None, which represents no remapping
-      on the row axis.
-    old_col_vocab_file: A scalar `Tensor` of type `string` containing the
-      path to the old column vocabulary file. Can be None, which represents no
-      remapping on the column axis.
-    new_col_vocab_file: A scalar `Tensor` of type `string` containing the path
-      to the new column vocabulary file. Can be None, which represents no
-      remapping on the column axis.
-    num_row_oov_buckets: `int` specifying the number of out-of-vocabulary rows
-      to append. Must be >= 0.
-    num_col_oov_buckets: `int` specifying the number of out-of-vocabulary
-      columns to append. Must be >= 0.
-    initializer: Initializer function to initialize missing values. Accepts a
-      1-D tensor as the arg to specify the shape of the returned tensor. If
-      `None`, defaults to using `zeros_initializer()`.
-    max_rows_in_memory: `int` specifying the maximum number of rows to load from
-      the checkpoint at once. If less than or equal to 0, the entire matrix will
-      be loaded into memory. Setting this arg trades increased disk reads for
-      lower memory usage.
-
-  Returns:
-    A variable initializer function that should be used to initialize a
-    (potentially partitioned) `Variable` whose complete shape is
-    `[new_row_vocab_size + num_row_oov_buckets, new_col_vocab_size +
-    num_col_oov_buckets]`.
-
-  Raises:
-    TypeError: If `initializer` is specified but not callable.
-  """
-  if initializer is None:
-    # TODO(b/25671353): Consider using sqrt(6/(fan_in + fan_out)) instead, from
-    # Glorot and Bengio, 2010.
-    initializer = init_ops.zeros_initializer()
-
-  if not callable(initializer):
-    raise TypeError(
-        "initializer must be callable, instead of being {} of type {}.".format(
-            initializer, type(initializer)))
-
-  def _initializer(shape, dtype=dtypes.float32, partition_info=None):
-    """Variable initializer.
-
-    Args:
-      shape: Shape of `Tensor` to return. Should include OOV on both axes.
-      dtype: Must be float32.
-      partition_info: variable_scope._PartitionInfo.
-
-    Returns:
-      `Tensor` of shape `shape`.
-
-    Raises:
-      TypeError: If `dtype` is anything other than float32.
-      ValueError: For shape mismatch upon invocation.
-    """
-    # Sanity checks.
-    if dtype != dtypes.float32:
-      raise TypeError(
-          "Currently, only float32 is supported. Received dtype: {}".format(
-              dtype))
-    if len(shape) != 2:
-      raise ValueError("Expected 2-dim shape, but received: {}".format(shape))
-    if shape[0] <= 0:
-      raise ValueError(
-          "Expected 1st dim of shape to be > 0, but received shape: {}".format(
-              shape))
-    if shape[1] != (new_col_vocab_size + num_col_oov_buckets):
-      raise ValueError(
-          "Expected 2nd dim of shape to be new_col_vocab_size ({}) + "
-          "num_col_oov_buckets ({}) = {}, but received shape: {}".format(
-              new_col_vocab_size, num_col_oov_buckets,
-              new_col_vocab_size + num_col_oov_buckets, shape))
-
-    offset = 0
-    if partition_info is not None:
-      offset = partition_info.single_offset(shape)
-
-    if offset + shape[0] > new_row_vocab_size + num_row_oov_buckets:
-      raise ValueError(
-          "Trying to initialize {} additional rows after {} rows have already "
-          "been initialized, which would exceed expected total row count of "
-          "new_row_vocab_size ({}) + num_row_oov_buckets ({}) = {}.".format(
-              shape[0], offset, new_row_vocab_size, num_row_oov_buckets,
-              new_row_vocab_size + num_row_oov_buckets))
-
-    row_oov_buckets_to_use = min(shape[0],
-                                 max(0, offset + shape[0] - new_row_vocab_size))
-    num_rows_to_load = shape[0] - row_oov_buckets_to_use
-
-    return _load_and_remap_matrix(
-        ckpt_path=ckpt_path,
-        old_tensor_name=old_tensor_name,
-        new_row_vocab_offset=offset,
-        num_rows_to_load=num_rows_to_load,
-        new_col_vocab_size=new_col_vocab_size,
-        initializer=initializer,
-        old_row_vocab_file=old_row_vocab_file,
-        new_row_vocab_file=new_row_vocab_file,
-        old_col_vocab_file=old_col_vocab_file,
-        new_col_vocab_file=new_col_vocab_file,
-        num_row_oov_buckets=row_oov_buckets_to_use,
-        num_col_oov_buckets=num_col_oov_buckets,
-        max_rows_in_memory=max_rows_in_memory)
-
-  return _initializer
-
-
-def load_embedding_initializer(ckpt_path,
-                               embedding_tensor_name,
-                               new_vocab_size,
-                               embedding_dim,
-                               old_vocab_file,
-                               new_vocab_file,
-                               num_oov_buckets=0,
-                               initializer=None,
-                               max_rows_in_memory=-1):
-  """Returns a variable initializer for loading pre-trained embeddings.
-
-  Wrapper around `load_and_remap_matrix_initializer()` specialized for loading
-  embedding weights and remapping according to the provided vocab files. See
-  docs for `load_and_remap_matrix_initializer()` for more details.
-
-  NOTE: Only for use with div-partitioned variables / vocabularies.
-
-  Args:
-    ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`)
-      from which the old matrix `Tensor` will be loaded.
-    embedding_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
-    new_vocab_size: Number of entries in the new vocab.
-    embedding_dim: `int` specifying the dimension of the embedding vectors from
-      the checkpoint. Must match the number of columns in the old embedding
-      matrix.
-    old_vocab_file: A scalar `Tensor` of type `string` containing the
-      path to the old vocabulary file.
-    new_vocab_file: A scalar `Tensor` of type `string` containing the
-      path to the new vocabulary file.
-    num_oov_buckets: `int` specifying the number of out-of-vocabulary
-      buckets to use. Must be >= 0.
-    initializer: Initializer function that accepts a 1-D tensor as the arg to
-      specify the shape of the returned tensor. If `None`, defaults to using
-      `truncated_normal_initializer()`.
-    max_rows_in_memory: `int` specifying the maximum number of rows to load from
-      the checkpoint at once. If less than or equal to 0, the entire matrix will
-      be loaded into memory. Setting this arg trades increased disk reads for
-      lower memory usage.
-
-  Returns:
-    A variable initializer function.
-  """
-  if initializer is None:
-    # TODO(b/25671353): This should be kept in sync with the stddev used by
-    # feature_column.py's _EmbeddingColumn.
-    initializer = init_ops.truncated_normal_initializer(
-        stddev=1.0 / math.sqrt(embedding_dim))
-
-  return load_and_remap_matrix_initializer(
-      ckpt_path=ckpt_path,
-      old_tensor_name=embedding_tensor_name,
-      new_row_vocab_size=new_vocab_size,
-      new_col_vocab_size=embedding_dim,
-      old_row_vocab_file=old_vocab_file,
-      new_row_vocab_file=new_vocab_file,
-      old_col_vocab_file=None,
-      new_col_vocab_file=None,
-      num_row_oov_buckets=num_oov_buckets,
-      num_col_oov_buckets=0,
-      initializer=initializer,
-      max_rows_in_memory=max_rows_in_memory)
+# pylint: disable=protected-access,line-too-long
+load_and_remap_matrix_initializer = checkpoint_ops._load_and_remap_matrix_initializer
+# pylint: enable=line-too-long
+load_embedding_initializer = checkpoint_ops._load_embedding_initializer
+# pylint: enable=protected-access
 
 
 def load_linear_multiclass_bias_initializer(ckpt_path,
diff --git a/tensorflow/contrib/framework/python/ops/checkpoint_ops_test.py b/tensorflow/contrib/framework/python/ops/checkpoint_ops_test.py
index a11d373244..b7b9f5c59e 100644
--- a/tensorflow/contrib/framework/python/ops/checkpoint_ops_test.py
+++ b/tensorflow/contrib/framework/python/ops/checkpoint_ops_test.py
@@ -21,7 +21,6 @@ import os
 import numpy as np
 
 from tensorflow.contrib import framework as contrib_framework
-from tensorflow.contrib.framework.python.ops import checkpoint_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -38,250 +37,6 @@ FLAGS = flags.FLAGS
 _TESTDATA_PATH = 'contrib/framework/testdata'
 
 
-class LoadAndRemapWrappersTest(test.TestCase):
-  """Tests for the functionality of the Python wrappers."""
-
-  def setUp(self):
-    self.bundle_file = os.path.join(
-        test.test_src_dir_path(_TESTDATA_PATH), 'bundle_checkpoint')
-    self.new_feature_vocab_file = os.path.join(
-        test.test_src_dir_path(_TESTDATA_PATH), 'bundle_checkpoint_vocab.txt')
-    self.old_feature_vocab_file = os.path.join(
-        test.test_src_dir_path(_TESTDATA_PATH),
-        'bundle_checkpoint_vocab_with_oov.txt')
-    self.new_class_vocab_file = os.path.join(
-        test.test_src_dir_path(_TESTDATA_PATH), 'keyword_new.txt')
-    self.old_class_vocab_file = os.path.join(
-        test.test_src_dir_path(_TESTDATA_PATH), 'keyword.txt')
-    self.init_val = 42
-
-    def _init_val_initializer(shape, dtype=None, partition_info=None):
-      del dtype, partition_info  # Unused by this unit-testing initializer.
-      return array_ops.tile(
-          constant_op.constant([[self.init_val]], dtype=dtypes.float32), shape)
-
-    self.initializer = _init_val_initializer
-
-  def test_load_and_remap_matrix(self):
-    """Tests the end-to-end loading / remapping of weights."""
-    # _load_and_remap_matrix() is the generalized wrapper that takes in row and
-    # column vocabulary files, calls the relevant remappings, and returns the
-    # weight matrix.  Take this example to be linear multi-class by providing
-    # both row and column vocabularies.
-    remapped_matrix = checkpoint_ops._load_and_remap_matrix(
-        new_row_vocab_file=self.new_feature_vocab_file,
-        old_row_vocab_file=self.old_feature_vocab_file,
-        num_rows_to_load=4,
-        new_col_vocab_file=self.new_class_vocab_file,
-        old_col_vocab_file=self.old_class_vocab_file,
-        new_col_vocab_size=4,
-        old_tensor_name='some_scope/embeddings',
-        ckpt_path=[self.bundle_file],
-        new_row_vocab_offset=1,
-        initializer=self.initializer,
-        num_row_oov_buckets=1,
-        num_col_oov_buckets=1)
-
-    # [4 in vocab + 1 oov features, 4 in vocab + 1 oov classes].  The offset
-    # means we read
-    expected_remapped_matrix = np.concatenate(
-        [
-            np.reshape([18, 34, 50, self.init_val, self.init_val], [5, 1]),
-            np.reshape([16, 32, 48, self.init_val, self.init_val], [5, 1]),
-            np.reshape([self.init_val] * 5, [5, 1]),
-            np.reshape([17, 33, 49, self.init_val, self.init_val], [5, 1]),
-            np.reshape([self.init_val] * 5, [5, 1])
-        ],
-        axis=1)
-
-    with self.test_session():
-      self.assertAllClose(expected_remapped_matrix, remapped_matrix.eval())
-
-  def test_load_and_remap_output_layer_weight_initializer_linear(self):
-    """Tests for the output layer initializer in the linear multi-class case."""
-    loading_initializer = (contrib_framework.load_and_remap_matrix_initializer(
-        new_row_vocab_size=5,
-        new_col_vocab_file=self.new_class_vocab_file,
-        old_col_vocab_file=self.old_class_vocab_file,
-        new_col_vocab_size=4,
-        old_tensor_name='some_scope/embeddings',
-        ckpt_path=[self.bundle_file],
-        new_row_vocab_file=self.new_feature_vocab_file,
-        old_row_vocab_file=self.old_feature_vocab_file,
-        num_row_oov_buckets=1,
-        num_col_oov_buckets=1,
-        initializer=self.initializer))
-
-    expected_remapped_matrix = np.concatenate(
-        [
-            np.reshape([2, 18, 34, 50, self.init_val, self.init_val], [6, 1]),
-            np.reshape([0, 16, 32, 48, self.init_val, self.init_val], [6, 1]),
-            np.reshape([self.init_val] * 6, [6, 1]),
-            np.reshape([1, 17, 33, 49, self.init_val, self.init_val], [6, 1]),
-            np.reshape([self.init_val] * 6, [6, 1])
-        ],
-        axis=1)
-
-    # The new weight matrix is of size
-    # [5 feature vocab + 1 feature OOV, 4 class vocab + 1 class OOV].  Use a
-    # partitioned variable to confirm that the offset logic works.
-    remapped_matrix = variable_scope.get_variable(
-        name='linear/obtained_weight_matrix',
-        shape=[6, 5],
-        initializer=loading_initializer,
-        partitioner=partitioned_variables.fixed_size_partitioner(2))
-
-    with self.test_session():
-      variables.global_variables_initializer().run()
-      self.assertAllClose(expected_remapped_matrix,
-                          remapped_matrix.as_tensor().eval())
-
-  def test_load_and_remap_output_layer_weight_initializer_dnn_output(self):
-    """Tests for the output layer initializer in the DNN output case."""
-    loading_initializer = (contrib_framework.load_and_remap_matrix_initializer(
-        new_row_vocab_size=5,
-        new_col_vocab_file=self.new_class_vocab_file,
-        old_col_vocab_file=self.old_class_vocab_file,
-        new_col_vocab_size=4,
-        old_tensor_name='some_scope/embeddings',
-        ckpt_path=[self.bundle_file],
-        num_col_oov_buckets=1,
-        initializer=self.initializer))
-
-    expected_remapped_matrix = np.concatenate(
-        [
-            np.reshape([2, 18, 34, 50, 66], [5, 1]),
-            np.reshape([0, 16, 32, 48, 64], [5, 1]),
-            np.reshape([self.init_val] * 5, [5, 1]),
-            np.reshape([1, 17, 33, 49, 65], [5, 1]),
-            np.reshape([self.init_val] * 5, [5, 1])
-        ],
-        axis=1)
-
-    # The new weight matrix is of size
-    # [5-sized input layer, 4 class vocab + 1 class OOV].
-    remapped_matrix = variable_scope.get_variable(
-        name='dnn_output/obtained_weight_matrix',
-        shape=[5, 5],
-        initializer=loading_initializer,
-        partitioner=partitioned_variables.fixed_size_partitioner(2))
-
-    with self.test_session():
-      variables.global_variables_initializer().run()
-      self.assertAllClose(expected_remapped_matrix,
-                          remapped_matrix.as_tensor().eval())
-
-  def test_initializer_with_oov_only_partition(self):
-    """Tests for the output layer initializer where one partition is all OOV."""
-    loading_initializer = (contrib_framework.load_and_remap_matrix_initializer(
-        new_row_vocab_size=5,
-        new_col_vocab_file=self.new_class_vocab_file,
-        old_col_vocab_file=self.old_class_vocab_file,
-        new_col_vocab_size=4,
-        old_tensor_name='some_scope/embeddings',
-        ckpt_path=[self.bundle_file],
-        new_row_vocab_file=self.new_feature_vocab_file,
-        old_row_vocab_file=self.old_feature_vocab_file,
-        num_row_oov_buckets=5,
-        num_col_oov_buckets=1,
-        initializer=self.initializer))
-
-    expected_remapped_matrix = np.concatenate(
-        [
-            np.reshape([2, 18, 34, 50] + [self.init_val] * 6, [10, 1]),
-            np.reshape([0, 16, 32, 48] + [self.init_val] * 6, [10, 1]),
-            np.reshape([self.init_val] * 10, [10, 1]),
-            np.reshape([1, 17, 33, 49] + [self.init_val] * 6, [10, 1]),
-            np.reshape([self.init_val] * 10, [10, 1]),
-        ],
-        axis=1)
-
-    # The new weight matrix is of size
-    # [5 feature vocab + 5 feature OOV, 4 class vocab + 1 class OOV].  The
-    # second partition has only OOV.
-    remapped_matrix = variable_scope.get_variable(
-        name='linear_all_oov/obtained_weight_matrix',
-        shape=[10, 5],
-        initializer=loading_initializer,
-        partitioner=partitioned_variables.fixed_size_partitioner(2))
-
-    with self.test_session():
-      variables.global_variables_initializer().run()
-      self.assertAllClose(expected_remapped_matrix,
-                          remapped_matrix.as_tensor().eval())
-
-  def test_load_and_remap_linear_multiclass_initializer_default_init(self):
-    """Tests where the zeros_initializer default is used for linear."""
-    loading_initializer = (contrib_framework.load_and_remap_matrix_initializer(
-        new_row_vocab_size=5,
-        new_col_vocab_file=self.new_class_vocab_file,
-        old_col_vocab_file=self.old_class_vocab_file,
-        new_col_vocab_size=4,
-        old_tensor_name='some_scope/embeddings',
-        ckpt_path=[self.bundle_file],
-        new_row_vocab_file=self.new_feature_vocab_file,
-        old_row_vocab_file=self.old_feature_vocab_file,
-        num_row_oov_buckets=1,
-        num_col_oov_buckets=1))
-
-    expected_remapped_matrix = np.concatenate(
-        [
-            np.reshape([2, 18, 34, 50, 0, 0], [6, 1]),
-            np.reshape([0, 16, 32, 48, 0, 0], [6, 1]),
-            np.reshape([0] * 6, [6, 1]),
-            np.reshape([1, 17, 33, 49, 0, 0], [6, 1]),
-            np.reshape([0] * 6, [6, 1])
-        ],
-        axis=1)
-
-    remapped_matrix = variable_scope.get_variable(
-        name='linear_init_fallback/obtained_weight_matrix',
-        shape=[6, 5],
-        initializer=loading_initializer,
-        partitioner=partitioned_variables.fixed_size_partitioner(2))
-
-    with self.test_session():
-      variables.global_variables_initializer().run()
-      self.assertAllClose(expected_remapped_matrix,
-                          remapped_matrix.as_tensor().eval())
-
-  def test_load_embedding_initializer(self):
-    """Tests for the load_embedding_initializer wrapper."""
-    embedding_loading_initializer = (
-        contrib_framework.load_embedding_initializer(
-            new_vocab_file=self.new_feature_vocab_file,
-            old_vocab_file=self.old_feature_vocab_file,
-            new_vocab_size=5,
-            embedding_dim=16,
-            embedding_tensor_name='some_scope/embeddings',
-            ckpt_path=[self.bundle_file],
-            num_oov_buckets=1,
-            initializer=self.initializer))
-
-    expected_remapped_embeddings = np.concatenate(
-        [
-            np.reshape(range(64), [4, 16]),
-            np.reshape([self.init_val] * 32, [2, 16]),
-        ],
-        axis=0)
-
-    # The new weight matrix is of size
-    # [5 feature vocab + 1 feature OOV, 16 (embedding dimension)], where the
-    # last vocab row (2nd last row) is newly initialized (wasn't found in
-    # previous vocab) and the actual last row is OOV and also newly initialized.
-    # Use a partitioned variable to confirm that the offset logic works.
-    remapped_embeddings = variable_scope.get_variable(
-        name='embedding/obtained_embedding_matrix',
-        shape=[6, 16],
-        initializer=embedding_loading_initializer,
-        partitioner=partitioned_variables.fixed_size_partitioner(2))
-
-    with self.test_session():
-      variables.global_variables_initializer().run()
-      self.assertAllClose(expected_remapped_embeddings,
-                          remapped_embeddings.as_tensor().eval())
-
-
 class LoadMulticlassBiasTest(test.TestCase):
   """Tests for the load_linear_multiclass_bias_initializer functionality."""
 
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 6597889fbc..b75f79fbf4 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2550,6 +2550,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":array_ops",
+        ":checkpoint_ops_gen",
         ":client",
         ":control_flow_ops",
         ":data_flow_ops",
@@ -3573,6 +3574,28 @@ py_test(
     ],
 )
 
+py_test(
+    name = "checkpoint_ops_test",
+    size = "small",
+    srcs = ["training/checkpoint_ops_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_windows"],
+    deps = [
+        ":checkpoint_ops_gen",
+        ":client",
+        ":client_testlib",
+        ":framework_for_generated_wrappers",
+        ":io_ops",
+        ":partitioned_variables",
+        ":platform",
+        ":pywrap_tensorflow",
+        ":state_ops",
+        ":training",
+        ":variable_scope",
+        ":variables",
+    ],
+)
+
 py_test(
     name = "monitored_session_test",
     size = "small",
diff --git a/tensorflow/python/training/checkpoint_ops.py b/tensorflow/python/training/checkpoint_ops.py
new file mode 100644
index 0000000000..70460ceb48
--- /dev/null
+++ b/tensorflow/python/training/checkpoint_ops.py
@@ -0,0 +1,453 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Operations for generating and loading vocab remappings."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_checkpoint_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
+
+ops.NotDifferentiable("GenerateVocabRemapping")
+ops.NotDifferentiable("LoadAndRemapMatrix")
+
+
+def _load_and_remap_matrix(ckpt_path,
+                           old_tensor_name,
+                           new_row_vocab_offset,
+                           num_rows_to_load,
+                           new_col_vocab_size,
+                           initializer,
+                           old_row_vocab_file=None,
+                           new_row_vocab_file=None,
+                           old_col_vocab_file=None,
+                           new_col_vocab_file=None,
+                           num_row_oov_buckets=0,
+                           num_col_oov_buckets=0,
+                           max_rows_in_memory=-1):
+  """Loads a 2-D (matrix) `Tensor` from checkpoint.
+
+  Generates 1D-remappings for rows and columns using the
+  `GenerateVocabRemapping` op, and initializes any anticipated values with the
+  provided initializer. Then, uses the `LoadAndRemapMatrix` op to create a
+  matrix that loads existing values from the checkpoint, while filling out
+  "missing" values with the newly initialized values. See
+  contrib/framework/ops/checkpoint_ops.cc for more information on the wrapped
+  functionality (LoadAndRemapMatrix). This wrapper can be used to perform only
+  row remapping or only col remapping. If only row remapping is desired,
+  {new,old}_col_vocab_file should be `None`, and vice versa for column
+  remapping.
+
+  NOTE: This only supports div-partitioning the vocabulary on the 1st dimension
+  (row axis) via `new_row_vocab_offset`.
+
+  Args:
+    ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`)
+      from which the old matrix `Tensor` will be loaded.
+    old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
+    new_row_vocab_offset: A 0-indexed integer representing what line to
+      start reading at in the new row vocabulary. Used for partitioned
+      variables.
+    num_rows_to_load: Number of rows to load for the new vocabulary (note: to
+      support variable partitioning and partial loading, this does not need to
+      be the same as the number of entries in `new_row_vocab_file`).
+    new_col_vocab_size: Number of columns to load - should be the same as the
+      number of entries in `new_col_vocab_file`, since we don't support
+      partitioning along the column axis.
+    initializer: Callable initializer function that accepts a 1-D tensor as the
+      arg to specify the shape of the returned tensor. Used to initialize
+      missing values.
+    old_row_vocab_file: A scalar `Tensor` of type `string` containing the
+      path to the old row vocabulary file. Can be None, which represents no
+      remapping on the row axis.
+    new_row_vocab_file: A scalar `Tensor` of type `string` containing the path
+      to the new row vocabulary file. Can be None, which represents no remapping
+      on the row axis - in which case, `new_row_vocab_offset` and
+      `num_rows_to_load` work under the assumption that the new row vocab is the
+      same as the old row vocab.
+    old_col_vocab_file: A scalar `Tensor` of type `string` containing the
+      path to the old column vocabulary file. Can be None, which represents no
+      remapping on the column axis.
+    new_col_vocab_file: A scalar `Tensor` of type `string` containing the path
+      to the new column vocabulary file. Can be None, which represents no
+      remapping on the column axis - in which case, `new_col_vocab_size` works
+      under the assumption that the new col vocab is the same as the old col
+      vocab.
+    num_row_oov_buckets: `int` specifying the number of out-of-vocabulary rows
+      to append. Must be >= 0.
+    num_col_oov_buckets: `int` specifying the number of out-of-vocabulary
+      columns to append. Must be >= 0.
+    max_rows_in_memory: `int` specifying the maximum number of rows to load from
+      the checkpoint at once. If less than or equal to 0, the entire matrix will
+      be loaded into memory. Setting this arg trades increased disk reads for
+      lower memory usage.
+
+  Returns:
+    A Tensor of shape `[num_rows_to_load + num_row_oov_buckets,
+    new_col_vocab_size + num_col_oov_buckets]`, with values loaded from the
+    specified tensor in the checkpoint, and any missing or OOV values
+    initialized with the given `initializer`.
+
+  Raises:
+    ValueError: If `num_row_oov_buckets` or `num_col_oov_buckets` < 0.
+    ValueError: If either `old_row_vocab_file` or `new_row_vocab_file` is
+      provided, while the other is not. Same for `old_col_vocab_file` and
+      `new_col_vocab_file`.
+    ValueError: If neither row vocabs or col vocabs are provided.
+  """
+  if num_row_oov_buckets < 0:
+    raise ValueError("num_row_oov_buckets must be >= 0, but received %d" %
+                     num_row_oov_buckets)
+  if num_col_oov_buckets < 0:
+    raise ValueError("num_col_oov_buckets must be >= 0, but received %d" %
+                     num_col_oov_buckets)
+
+  if bool(old_row_vocab_file) != bool(new_row_vocab_file):
+    raise ValueError(
+        "old_row_vocab_file and new_row_vocab_file must both be specified or "
+        "left unspecified. old_row_vocab_file='{}', new_row_vocab_file='{}'".
+        format(old_row_vocab_file, new_row_vocab_file))
+  if bool(old_col_vocab_file) != bool(new_col_vocab_file):
+    raise ValueError(
+        "old_col_vocab_file and new_col_vocab_file must both be specified or "
+        "left unspecified. old_col_vocab_file='{}', new_col_vocab_file='{}'".
+        format(old_col_vocab_file, new_col_vocab_file))
+
+  remap_rows = new_row_vocab_file and old_row_vocab_file
+  remap_cols = new_col_vocab_file and old_col_vocab_file
+  if not (remap_rows or remap_cols):
+    raise ValueError(
+        "Must provide either row or column vocab files. If no remapping is "
+        "necessary, consider using `tf.contrib.framework.init_from_checkpoint` "
+        "instead.")
+
+  num_rows_present = num_rows_to_load
+  if remap_rows:
+    row_remapping, num_rows_present = (
+        gen_checkpoint_ops._generate_vocab_remapping(  # pylint: disable=protected-access
+            new_vocab_file=new_row_vocab_file,
+            old_vocab_file=old_row_vocab_file,
+            new_vocab_offset=new_row_vocab_offset,
+            num_new_vocab=num_rows_to_load))
+  else:
+    # Even when the rows are not being reordered, we still need to generate a
+    # remapping to account for initializing partitioned Variables (when
+    # new_row_vocab_offset is non-zero).
+    row_remapping = math_ops.range(
+        new_row_vocab_offset,
+        new_row_vocab_offset + num_rows_to_load,
+        dtype=dtypes.int64)
+
+  col_remapping = []
+  num_cols_present = new_col_vocab_size
+  if remap_cols:
+    col_remapping, num_cols_present = (
+        gen_checkpoint_ops._generate_vocab_remapping(  # pylint: disable=protected-access
+            new_vocab_file=new_col_vocab_file,
+            old_vocab_file=old_col_vocab_file,
+            new_vocab_offset=0,  # Offset is unused for cols (no partitioning).
+            num_new_vocab=new_col_vocab_size))
+
+  init_vals = initializer([
+      num_rows_to_load * new_col_vocab_size -
+      num_rows_present * num_cols_present, 1
+  ])
+  return_tensor = gen_checkpoint_ops._load_and_remap_matrix(  # pylint: disable=protected-access
+      ckpt_path=ckpt_path,
+      old_tensor_name=old_tensor_name,
+      row_remapping=row_remapping,
+      col_remapping=col_remapping,
+      initializing_values=init_vals,
+      num_rows=num_rows_to_load,
+      num_cols=new_col_vocab_size,
+      max_rows_in_memory=max_rows_in_memory)
+
+  # Add OOV row(s) and column(s).
+  if num_row_oov_buckets > 0:
+    init_row_oov_val = initializer([num_row_oov_buckets, new_col_vocab_size])
+    init_row_oov_val = ops.convert_to_tensor(init_row_oov_val)
+    return_tensor = array_ops.concat([return_tensor, init_row_oov_val], 0)
+  if num_col_oov_buckets > 0:
+    # We need to add any row OOV to the new column shape.
+    init_col_oov_val = initializer(
+        [num_rows_to_load + num_row_oov_buckets, num_col_oov_buckets])
+    init_col_oov_val = ops.convert_to_tensor(init_col_oov_val)
+    return_tensor = array_ops.concat([return_tensor, init_col_oov_val], 1)
+
+  return return_tensor
+
+
+def _load_and_remap_matrix_initializer(ckpt_path,
+                                       old_tensor_name,
+                                       new_row_vocab_size,
+                                       new_col_vocab_size,
+                                       old_row_vocab_file=None,
+                                       new_row_vocab_file=None,
+                                       old_col_vocab_file=None,
+                                       new_col_vocab_file=None,
+                                       num_row_oov_buckets=0,
+                                       num_col_oov_buckets=0,
+                                       initializer=None,
+                                       max_rows_in_memory=-1):
+  r"""Returns a var initializer for loading and remapping a 2-D (matrix) tensor.
+
+  The returned initializer loads a 2-D (matrix) `Tensor` with name
+  `old_tensor_name` from the checkpoint at `ckpt_path`. It will reorder the
+  rows/columns according to the specified vocab files and append additional
+  out-of-vocabulary rows/columns according to the number of OOV buckets.
+
+  The format of the file at the `{old,new}_{row,col}_vocab_file` path should be
+  a text file, with each line containing a single entity within the vocabulary.
+  Let the function `line_of(f, "x")` return the 0-indexed line number of the
+  entity "x" in file f, and the function `entity_at(f, i)` return the entity at
+  line i of file f. Then, row i of the new output matrix will be taken from row
+  `line_of(old_row_vocab_file, entity_at(new_row_vocab_file, i))` of the old
+  matrix. If any entity in `new_row_vocab_file` is not found in
+  `old_row_vocab_file`, that row is considered a "missing" row, and its values
+  will be initialized using the `initializer` arg. The same logic also applies
+  for the columns.
+
+  For example, assuming that:
+
+  * `old_row_vocab_file` contains "mercury\nvenus\nmars"
+  * `new_row_vocab_file` contains "venus\njupiter\nmercury"
+  * `old_col_vocab_file` contains "good\nbetter\nbest"
+  * `new_col_vocab_file` contains "good\nbest\nfantastic"
+  * `initializer` returns the natural numbers `[1, 2, 3, 4, ...]`
+  * `w(i, j)` represents the value from row i, column j of the old matrix
+
+  Then the new output matrix will look like:
+
+  `[[w(1, 0), w(1, 2), 1],
+    [2,       3,       4],
+    [w(0, 0), w(0, 2), 5]]`
+
+  If we further specify that:
+
+  * `num_row_oov_buckets` == 2
+  * `num_col_oov_buckets` == 1
+
+  Then the new output matrix will look like:
+
+  `[[w(1, 0), w(1, 2), 1,  12],
+    [2,       3,       4,  13],
+    [w(0, 0), w(0, 2), 5,  14],
+    [6,       7,       8,  15],
+    [9,       10,      11, 16]]`
+
+  If `{old,new}_row_vocab_file` are None, we assume that the old and new row
+  vocab files are the same, and no row remapping is done. If
+  `{old,new}_col_vocab_file` are None, we assume that the old and new column
+  vocab files are the same, and no column remapping is done.
+
+  The returned initializer only supports div-partitioning along the row axis. It
+  does not support partitioning along the column axis or mod-partitioning.
+
+  NOTE: When this is used to warm-start variables, client code should use
+  `tf.lookup.index_table_from_tensor()` like
+  contrib/layers/python/layers/feature_column.py does, as opposed to
+  `tf.feature_to_id()` - in order to ensure the underlying lookup tables are the
+  same.
+
+  Args:
+    ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`)
+      from which the old matrix `Tensor` will be loaded.
+    old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
+    new_row_vocab_size: `int` specifying the number of entries in
+      `new_row_vocab_file`. If no row remapping is needed (no row vocab
+      provided), this should be equal to the number of rows to load from the old
+      matrix (which can theoretically be smaller than the number of rows in the
+      old matrix).
+    new_col_vocab_size: `int` specifying the number of entries in
+      `new_col_vocab_file`. If no column remapping is needed (no column vocab
+      provided), this should be equal to the number of columns in the old
+      matrix.
+    old_row_vocab_file: A scalar `Tensor` of type `string` containing the
+      path to the old row vocabulary file. Can be None, which represents no
+      remapping on the row axis.
+    new_row_vocab_file: A scalar `Tensor` of type `string` containing the path
+      to the new row vocabulary file. Can be None, which represents no remapping
+      on the row axis.
+    old_col_vocab_file: A scalar `Tensor` of type `string` containing the
+      path to the old column vocabulary file. Can be None, which represents no
+      remapping on the column axis.
+    new_col_vocab_file: A scalar `Tensor` of type `string` containing the path
+      to the new column vocabulary file. Can be None, which represents no
+      remapping on the column axis.
+    num_row_oov_buckets: `int` specifying the number of out-of-vocabulary rows
+      to append. Must be >= 0.
+    num_col_oov_buckets: `int` specifying the number of out-of-vocabulary
+      columns to append. Must be >= 0.
+    initializer: Initializer function to initialize missing values. Accepts a
+      1-D tensor as the arg to specify the shape of the returned tensor. If
+      `None`, defaults to using `zeros_initializer()`.
+    max_rows_in_memory: `int` specifying the maximum number of rows to load from
+      the checkpoint at once. If less than or equal to 0, the entire matrix will
+      be loaded into memory. Setting this arg trades increased disk reads for
+      lower memory usage.
+
+  Returns:
+    A variable initializer function that should be used to initialize a
+    (potentially partitioned) `Variable` whose complete shape is
+    `[new_row_vocab_size + num_row_oov_buckets, new_col_vocab_size +
+    num_col_oov_buckets]`.
+
+  Raises:
+    TypeError: If `initializer` is specified but not callable.
+  """
+  if initializer is None:
+    # TODO(b/25671353): Consider using sqrt(6/(fan_in + fan_out)) instead, from
+    # Glorot and Bengio, 2010.
+    initializer = init_ops.zeros_initializer()
+
+  if not callable(initializer):
+    raise TypeError(
+        "initializer must be callable, instead of being {} of type {}.".format(
+            initializer, type(initializer)))
+
+  def _initializer(shape, dtype=dtypes.float32, partition_info=None):
+    """Variable initializer.
+
+    Args:
+      shape: Shape of `Tensor` to return. Should include OOV on both axes.
+      dtype: Must be float32.
+      partition_info: variable_scope._PartitionInfo.
+
+    Returns:
+      `Tensor` of shape `shape`.
+
+    Raises:
+      TypeError: If `dtype` is anything other than float32.
+      ValueError: For shape mismatch upon invocation.
+    """
+    # Sanity checks.
+    if dtype != dtypes.float32:
+      raise TypeError(
+          "Currently, only float32 is supported. Received dtype: {}".format(
+              dtype))
+    if len(shape) != 2:
+      raise ValueError("Expected 2-dim shape, but received: {}".format(shape))
+    if shape[0] <= 0:
+      raise ValueError(
+          "Expected 1st dim of shape to be > 0, but received shape: {}".format(
+              shape))
+    if shape[1] != (new_col_vocab_size + num_col_oov_buckets):
+      raise ValueError(
+          "Expected 2nd dim of shape to be new_col_vocab_size ({}) + "
+          "num_col_oov_buckets ({}) = {}, but received shape: {}".format(
+              new_col_vocab_size, num_col_oov_buckets,
+              new_col_vocab_size + num_col_oov_buckets, shape))
+
+    offset = 0
+    if partition_info is not None:
+      offset = partition_info.single_offset(shape)
+
+    if offset + shape[0] > new_row_vocab_size + num_row_oov_buckets:
+      raise ValueError(
+          "Trying to initialize {} additional rows after {} rows have already "
+          "been initialized, which would exceed expected total row count of "
+          "new_row_vocab_size ({}) + num_row_oov_buckets ({}) = {}.".format(
+              shape[0], offset, new_row_vocab_size, num_row_oov_buckets,
+              new_row_vocab_size + num_row_oov_buckets))
+
+    row_oov_buckets_to_use = min(shape[0],
+                                 max(0, offset + shape[0] - new_row_vocab_size))
+    num_rows_to_load = shape[0] - row_oov_buckets_to_use
+
+    return _load_and_remap_matrix(
+        ckpt_path=ckpt_path,
+        old_tensor_name=old_tensor_name,
+        new_row_vocab_offset=offset,
+        num_rows_to_load=num_rows_to_load,
+        new_col_vocab_size=new_col_vocab_size,
+        initializer=initializer,
+        old_row_vocab_file=old_row_vocab_file,
+        new_row_vocab_file=new_row_vocab_file,
+        old_col_vocab_file=old_col_vocab_file,
+        new_col_vocab_file=new_col_vocab_file,
+        num_row_oov_buckets=row_oov_buckets_to_use,
+        num_col_oov_buckets=num_col_oov_buckets,
+        max_rows_in_memory=max_rows_in_memory)
+
+  return _initializer
+
+
+def _load_embedding_initializer(ckpt_path,
+                                embedding_tensor_name,
+                                new_vocab_size,
+                                embedding_dim,
+                                old_vocab_file,
+                                new_vocab_file,
+                                num_oov_buckets=0,
+                                initializer=None,
+                                max_rows_in_memory=-1):
+  """Returns a variable initializer for loading pre-trained embeddings.
+
+  Wrapper around `load_and_remap_matrix_initializer()` specialized for loading
+  embedding weights and remapping according to the provided vocab files. See
+  docs for `load_and_remap_matrix_initializer()` for more details.
+
+  NOTE: Only for use with div-partitioned variables / vocabularies.
+
+  Args:
+    ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`)
+      from which the old matrix `Tensor` will be loaded.
+    embedding_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
+    new_vocab_size: Number of entries in the new vocab.
+    embedding_dim: `int` specifying the dimension of the embedding vectors from
+      the checkpoint. Must match the number of columns in the old embedding
+      matrix.
+    old_vocab_file: A scalar `Tensor` of type `string` containing the
+      path to the old vocabulary file.
+    new_vocab_file: A scalar `Tensor` of type `string` containing the
+      path to the new vocabulary file.
+    num_oov_buckets: `int` specifying the number of out-of-vocabulary
+      buckets to use. Must be >= 0.
+    initializer: Initializer function that accepts a 1-D tensor as the arg to
+      specify the shape of the returned tensor. If `None`, defaults to using
+      `truncated_normal_initializer()`.
+    max_rows_in_memory: `int` specifying the maximum number of rows to load from
+      the checkpoint at once. If less than or equal to 0, the entire matrix will
+      be loaded into memory. Setting this arg trades increased disk reads for
+      lower memory usage.
+
+  Returns:
+    A variable initializer function.
+  """
+  if initializer is None:
+    # TODO(b/25671353): This should be kept in sync with the stddev used by
+    # feature_column.py's _EmbeddingColumn.
+    initializer = init_ops.truncated_normal_initializer(
+        stddev=1.0 / math.sqrt(embedding_dim))
+
+  return _load_and_remap_matrix_initializer(
+      ckpt_path=ckpt_path,
+      old_tensor_name=embedding_tensor_name,
+      new_row_vocab_size=new_vocab_size,
+      new_col_vocab_size=embedding_dim,
+      old_row_vocab_file=old_vocab_file,
+      new_row_vocab_file=new_vocab_file,
+      old_col_vocab_file=None,
+      new_col_vocab_file=None,
+      num_row_oov_buckets=num_oov_buckets,
+      num_col_oov_buckets=0,
+      initializer=initializer,
+      max_rows_in_memory=max_rows_in_memory)
diff --git a/tensorflow/python/training/checkpoint_ops_test.py b/tensorflow/python/training/checkpoint_ops_test.py
new file mode 100644
index 0000000000..39c4d2911f
--- /dev/null
+++ b/tensorflow/python/training/checkpoint_ops_test.py
@@ -0,0 +1,305 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functional tests for Python wrappers around warm-starting."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import partitioned_variables
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+from tensorflow.python.training import checkpoint_ops
+from tensorflow.python.training import saver as saver_lib
+
+
+class LoadAndRemapWrappersTest(test.TestCase):
+  """Tests for the functionality of the Python wrappers."""
+
+  def setUp(self):
+    ops.reset_default_graph()
+    # Create the checkpoint file in a temporary directory.
+    checkpoint_prefix = os.path.join(self.get_temp_dir(), 'model')
+    # 0., 1., ..., 79. reshaped into [5, 16].
+    initializer = init_ops.constant_initializer(
+        np.reshape(np.linspace(0.0, 79, 5 * 16), (5, 16)))
+    with self.test_session() as sess:
+      with variable_scope.variable_scope('some_scope'):
+        variable_scope.get_variable(name='embeddings', shape=[5, 16],
+                                    initializer=initializer)
+      sess.run(variables.global_variables_initializer())
+      saver = saver_lib.Saver()
+      saver.save(sess, checkpoint_prefix, global_step=5)
+    self.checkpoint_file = '{}-5'.format(checkpoint_prefix)
+
+    # Create the vocabulary files.
+    self.new_feature_vocab_file = os.path.join(
+        self.get_temp_dir(), 'new_feature_vocab.txt')
+    with open(self.new_feature_vocab_file, 'w') as f:
+      f.write('\n'.join(['zero', 'one', 'two', 'three', 'four']) + '\n')
+
+    self.old_feature_vocab_file = os.path.join(
+        self.get_temp_dir(), 'old_feature_vocab.txt')
+    with open(self.old_feature_vocab_file, 'w') as f:
+      f.write('\n'.join(['zero', 'one', 'two', 'three']) + '\n')
+
+    self.new_class_vocab_file = os.path.join(
+        self.get_temp_dir(), 'new_class_vocab.txt')
+    with open(self.new_class_vocab_file, 'w') as f:
+      f.write('\n'.join(['MISSING', 'knitting', 'flask', 'eminem']) + '\n')
+
+    self.old_class_vocab_file = os.path.join(
+        self.get_temp_dir(), 'old_class_vocab.txt')
+    with open(self.old_class_vocab_file, 'w') as f:
+      f.write('\n'.join(['knitting', 'eminem', 'MISSING']) + '\n')
+
+    self.init_val = 42
+
+    def _init_val_initializer(shape, dtype=None, partition_info=None):
+      del dtype, partition_info  # Unused by this unit-testing initializer.
+      return array_ops.tile(
+          constant_op.constant([[self.init_val]], dtype=dtypes.float32), shape)
+
+    self.initializer = _init_val_initializer
+
+  def test_load_and_remap_matrix(self):
+    """Tests the end-to-end loading / remapping of weights."""
+    # _load_and_remap_matrix() is the generalized wrapper that takes in row and
+    # column vocabulary files, calls the relevant remappings, and returns the
+    # weight matrix.  Take this example to be linear multi-class by providing
+    # both row and column vocabularies.
+    remapped_matrix = checkpoint_ops._load_and_remap_matrix(
+        new_row_vocab_file=self.new_feature_vocab_file,
+        old_row_vocab_file=self.old_feature_vocab_file,
+        num_rows_to_load=4,
+        new_col_vocab_file=self.new_class_vocab_file,
+        old_col_vocab_file=self.old_class_vocab_file,
+        new_col_vocab_size=4,
+        old_tensor_name='some_scope/embeddings',
+        ckpt_path=[self.checkpoint_file],
+        new_row_vocab_offset=1,
+        initializer=self.initializer,
+        num_row_oov_buckets=1,
+        num_col_oov_buckets=1)
+
+    # [4 in vocab + 1 oov features, 4 in vocab + 1 oov classes].  The offset
+    # means we read
+    expected_remapped_matrix = np.concatenate(
+        [
+            np.reshape([18, 34, 50, self.init_val, self.init_val], [5, 1]),
+            np.reshape([16, 32, 48, self.init_val, self.init_val], [5, 1]),
+            np.reshape([self.init_val] * 5, [5, 1]),
+            np.reshape([17, 33, 49, self.init_val, self.init_val], [5, 1]),
+            np.reshape([self.init_val] * 5, [5, 1])
+        ],
+        axis=1)
+
+    with self.test_session():
+      self.assertAllClose(expected_remapped_matrix, remapped_matrix.eval())
+
+  def test_load_and_remap_output_layer_weight_initializer_linear(self):
+    """Tests for the output layer initializer in the linear multi-class case."""
+    loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer(
+        new_row_vocab_size=5,
+        new_col_vocab_file=self.new_class_vocab_file,
+        old_col_vocab_file=self.old_class_vocab_file,
+        new_col_vocab_size=4,
+        old_tensor_name='some_scope/embeddings',
+        ckpt_path=[self.checkpoint_file],
+        new_row_vocab_file=self.new_feature_vocab_file,
+        old_row_vocab_file=self.old_feature_vocab_file,
+        num_row_oov_buckets=1,
+        num_col_oov_buckets=1,
+        initializer=self.initializer))
+
+    expected_remapped_matrix = np.concatenate(
+        [
+            np.reshape([2, 18, 34, 50, self.init_val, self.init_val], [6, 1]),
+            np.reshape([0, 16, 32, 48, self.init_val, self.init_val], [6, 1]),
+            np.reshape([self.init_val] * 6, [6, 1]),
+            np.reshape([1, 17, 33, 49, self.init_val, self.init_val], [6, 1]),
+            np.reshape([self.init_val] * 6, [6, 1])
+        ],
+        axis=1)
+
+    # The new weight matrix is of size
+    # [5 feature vocab + 1 feature OOV, 4 class vocab + 1 class OOV].  Use a
+    # partitioned variable to confirm that the offset logic works.
+    remapped_matrix = variable_scope.get_variable(
+        name='linear/obtained_weight_matrix',
+        shape=[6, 5],
+        initializer=loading_initializer,
+        partitioner=partitioned_variables.fixed_size_partitioner(2))
+
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      self.assertAllClose(expected_remapped_matrix,
+                          remapped_matrix.as_tensor().eval())
+
+  def test_load_and_remap_output_layer_weight_initializer_dnn_output(self):
+    """Tests for the output layer initializer in the DNN output case."""
+    loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer(
+        new_row_vocab_size=5,
+        new_col_vocab_file=self.new_class_vocab_file,
+        old_col_vocab_file=self.old_class_vocab_file,
+        new_col_vocab_size=4,
+        old_tensor_name='some_scope/embeddings',
+        ckpt_path=[self.checkpoint_file],
+        num_col_oov_buckets=1,
+        initializer=self.initializer))
+
+    expected_remapped_matrix = np.concatenate(
+        [
+            np.reshape([2, 18, 34, 50, 66], [5, 1]),
+            np.reshape([0, 16, 32, 48, 64], [5, 1]),
+            np.reshape([self.init_val] * 5, [5, 1]),
+            np.reshape([1, 17, 33, 49, 65], [5, 1]),
+            np.reshape([self.init_val] * 5, [5, 1])
+        ],
+        axis=1)
+
+    # The new weight matrix is of size
+    # [5-sized input layer, 4 class vocab + 1 class OOV].
+    remapped_matrix = variable_scope.get_variable(
+        name='dnn_output/obtained_weight_matrix',
+        shape=[5, 5],
+        initializer=loading_initializer,
+        partitioner=partitioned_variables.fixed_size_partitioner(2))
+
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      self.assertAllClose(expected_remapped_matrix,
+                          remapped_matrix.as_tensor().eval())
+
+  def test_initializer_with_oov_only_partition(self):
+    """Tests for the output layer initializer where one partition is all OOV."""
+    loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer(
+        new_row_vocab_size=5,
+        new_col_vocab_file=self.new_class_vocab_file,
+        old_col_vocab_file=self.old_class_vocab_file,
+        new_col_vocab_size=4,
+        old_tensor_name='some_scope/embeddings',
+        ckpt_path=[self.checkpoint_file],
+        new_row_vocab_file=self.new_feature_vocab_file,
+        old_row_vocab_file=self.old_feature_vocab_file,
+        num_row_oov_buckets=5,
+        num_col_oov_buckets=1,
+        initializer=self.initializer))
+
+    expected_remapped_matrix = np.concatenate(
+        [
+            np.reshape([2, 18, 34, 50] + [self.init_val] * 6, [10, 1]),
+            np.reshape([0, 16, 32, 48] + [self.init_val] * 6, [10, 1]),
+            np.reshape([self.init_val] * 10, [10, 1]),
+            np.reshape([1, 17, 33, 49] + [self.init_val] * 6, [10, 1]),
+            np.reshape([self.init_val] * 10, [10, 1]),
+        ],
+        axis=1)
+
+    # The new weight matrix is of size
+    # [5 feature vocab + 5 feature OOV, 4 class vocab + 1 class OOV].  The
+    # second partition has only OOV.
+    remapped_matrix = variable_scope.get_variable(
+        name='linear_all_oov/obtained_weight_matrix',
+        shape=[10, 5],
+        initializer=loading_initializer,
+        partitioner=partitioned_variables.fixed_size_partitioner(2))
+
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      self.assertAllClose(expected_remapped_matrix,
+                          remapped_matrix.as_tensor().eval())
+
+  def test_load_and_remap_linear_multiclass_initializer_default_init(self):
+    """Tests where the zeros_initializer default is used for linear."""
+    loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer(
+        new_row_vocab_size=5,
+        new_col_vocab_file=self.new_class_vocab_file,
+        old_col_vocab_file=self.old_class_vocab_file,
+        new_col_vocab_size=4,
+        old_tensor_name='some_scope/embeddings',
+        ckpt_path=[self.checkpoint_file],
+        new_row_vocab_file=self.new_feature_vocab_file,
+        old_row_vocab_file=self.old_feature_vocab_file,
+        num_row_oov_buckets=1,
+        num_col_oov_buckets=1))
+
+    expected_remapped_matrix = np.concatenate(
+        [
+            np.reshape([2, 18, 34, 50, 0, 0], [6, 1]),
+            np.reshape([0, 16, 32, 48, 0, 0], [6, 1]),
+            np.reshape([0] * 6, [6, 1]),
+            np.reshape([1, 17, 33, 49, 0, 0], [6, 1]),
+            np.reshape([0] * 6, [6, 1])
+        ],
+        axis=1)
+
+    remapped_matrix = variable_scope.get_variable(
+        name='linear_init_fallback/obtained_weight_matrix',
+        shape=[6, 5],
+        initializer=loading_initializer,
+        partitioner=partitioned_variables.fixed_size_partitioner(2))
+
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      self.assertAllClose(expected_remapped_matrix,
+                          remapped_matrix.as_tensor().eval())
+
+  def test_load_embedding_initializer(self):
+    """Tests for the load_embedding_initializer wrapper."""
+    embedding_loading_initializer = (checkpoint_ops._load_embedding_initializer(
+        new_vocab_file=self.new_feature_vocab_file,
+        old_vocab_file=self.old_feature_vocab_file,
+        new_vocab_size=5,
+        embedding_dim=16,
+        embedding_tensor_name='some_scope/embeddings',
+        ckpt_path=[self.checkpoint_file],
+        num_oov_buckets=1,
+        initializer=self.initializer))
+
+    expected_remapped_embeddings = np.concatenate(
+        [
+            np.reshape(range(64), [4, 16]),
+            np.reshape([self.init_val] * 32, [2, 16]),
+        ],
+        axis=0)
+
+    # The new weight matrix is of size
+    # [5 feature vocab + 1 feature OOV, 16 (embedding dimension)], where the
+    # last vocab row (2nd last row) is newly initialized (wasn't found in
+    # previous vocab) and the actual last row is OOV and also newly initialized.
+    # Use a partitioned variable to confirm that the offset logic works.
+    remapped_embeddings = variable_scope.get_variable(
+        name='embedding/obtained_embedding_matrix',
+        shape=[6, 16],
+        initializer=embedding_loading_initializer,
+        partitioner=partitioned_variables.fixed_size_partitioner(2))
+
+    with self.test_session():
+      variables.global_variables_initializer().run()
+      self.assertAllClose(expected_remapped_embeddings,
+                          remapped_embeddings.as_tensor().eval())
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From 4aa83760c636085349da57e462acabedc8725937 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 16:44:47 -0700
Subject: [PATCH 130/294] Don't try to evaluate a Tensor as a boolean.

PiperOrigin-RevId: 167069142
---
 tensorflow/python/layers/convolutional.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py
index e46c0b2954..41c67743b6 100644
--- a/tensorflow/python/layers/convolutional.py
+++ b/tensorflow/python/layers/convolutional.py
@@ -172,7 +172,7 @@ class _Conv(base.Layer):
         padding=self.padding.upper(),
         data_format=utils.convert_data_format(self.data_format, self.rank + 2))
 
-    if self.bias is not None:
+    if self.use_bias:
       if self.data_format == 'channels_first':
         if self.rank == 1:
           # nn.bias_add does not accept a 1D input tensor.
@@ -989,7 +989,7 @@ class SeparableConv2D(Conv2D):
         rate=self.dilation_rate,
         data_format=utils.convert_data_format(self.data_format, ndim=4))
 
-    if self.bias is not None:
+    if self.use_bias:
       outputs = nn.bias_add(
           outputs,
           self.bias,
@@ -1308,7 +1308,7 @@ class Conv2DTranspose(Conv2D):
                                                      stride_w)
       outputs.set_shape(out_shape)
 
-    if self.bias:
+    if self.use_bias:
       outputs = nn.bias_add(
           outputs,
           self.bias,
@@ -1611,7 +1611,7 @@ class Conv3DTranspose(Conv3D):
                                                      stride_w)
       outputs.set_shape(out_shape)
 
-    if self.bias:
+    if self.use_bias:
       outputs_shape = outputs.shape.as_list()
       if self.data_format == 'channels_first':
         outputs_4d = array_ops.reshape(outputs, [
-- 
GitLab


From d22ed610a0c7395a0e3ea8349b0da0e8afa62fe1 Mon Sep 17 00:00:00 2001
From: Ali Yahya 
Date: Wed, 30 Aug 2017 17:29:54 -0700
Subject: [PATCH 131/294] Adds tape.watch_variable(v) where v is any
 ResourceVariable.

PiperOrigin-RevId: 167074863
---
 tensorflow/python/eager/backprop_test.py | 2 +-
 tensorflow/python/eager/tape.py          | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 010124ed56..429eeabb42 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -85,7 +85,7 @@ class BackpropTest(test.TestCase):
         initial_value=tensor.Tensor(1.0), name='x')
 
     def fn():
-      tape.watch(x.handle)
+      tape.watch_variable(x)
       b = tensor.Tensor(2.0)
       c = math_ops.add(x.value(), b)
       return math_ops.add(c, tensor.Tensor(3.0))
diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py
index 4d09db73c9..9cd29f630d 100644
--- a/tensorflow/python/eager/tape.py
+++ b/tensorflow/python/eager/tape.py
@@ -151,6 +151,15 @@ def watch(tensor):
   return tensor
 
 
+def watch_variable(resource_variable):
+  """Marks this ResourceVariable to be watched by all tapes in the stack.
+
+  Args:
+    resource_variable: A ResourceVariable to be watched.
+  """
+  watch(resource_variable.handle)  # py-lint: disable=protected-access
+
+
 def pop_tape():
   """Pops the top tape in the stack, if any."""
   if _tape_stack.stack:
-- 
GitLab


From 4807158b3df7fdfd1a597c581e7ba9f894b1b256 Mon Sep 17 00:00:00 2001
From: James Qin 
Date: Wed, 30 Aug 2017 17:36:00 -0700
Subject: [PATCH 132/294] Add functional cudnn_rnn_ops

Will deprecate class-style cudnn_rnn_ops soon. As a transition
plan, have the former depend on the latter for now.

PiperOrigin-RevId: 167075637
---
 .../cudnn_rnn/python/ops/cudnn_rnn_ops.py     | 555 ++++++++++++++++--
 1 file changed, 509 insertions(+), 46 deletions(-)

diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
index 694bd507d9..bc4fd10cac 100644
--- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
+++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
@@ -716,6 +716,482 @@ _cudnn_rnn_common_doc_string = """
 """
 
 
+def _check_direction(direction):
+  if direction not in (CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION):
+    raise ValueError("Invalid direction: %s, expect %s or %s" %
+                     (direction, CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION))
+
+
+def _check_rnn_mode(rnn_mode):
+  if rnn_mode not in (CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_TANH, CUDNN_RNN_RELU):
+    raise ValueError("Invalid rnn_mode: %s, expect one of (%s, %s, %s, %s)" %
+                     (rnn_mode, CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_TANH,
+                      CUDNN_RNN_RELU))
+
+
+def _get_seed(seed):
+  seed, seed2 = random_seed.get_seed(seed)
+  if seed is None and seed2 is None:
+    seed, seed2 = 0, 0
+  return seed, seed2
+
+
+def _get_num_params(rnn_mode, num_layers, direction):
+  """Return num params for given Cudnn config."""
+  if rnn_mode == CUDNN_LSTM:
+    num_params_per_layer = 8
+  elif rnn_mode == CUDNN_GRU:
+    num_params_per_layer = 6
+  elif rnn_mode in (CUDNN_RNN_RELU, CUDNN_RNN_TANH):
+    num_params_per_layer = 2
+  else:
+    raise ValueError("Invalid \'rnn_mode\': %s", rnn_mode)
+  num_params = num_layers * num_params_per_layer
+  if direction != CUDNN_RNN_UNIDIRECTION:
+    num_params *= 2
+  return num_params
+
+
+def _cudnn_rnn(inputs,
+               input_h,
+               input_c,
+               params,
+               is_training,
+               rnn_mode,
+               input_mode=CUDNN_INPUT_LINEAR_MODE,
+               direction=CUDNN_RNN_UNIDIRECTION,
+               dropout=0.,
+               seed=0,
+               name=None):
+  """Cudnn RNN.
+
+  Args:
+    inputs: the input sequence to the RNN model. A Tensor of shape [?,
+      batch_size, input_size].
+    input_h: the initial hidden state for h. A Tensor of shape [num_layers,
+      batch_size, num_units].
+    input_c: the initial hidden state for c. This is only relevant for LSTM.
+      A Tensor of the same shape as input_h.
+    params: the parameter buffer created for this model.
+    is_training: whether this operation will be used in training or inference
+    rnn_mode: one of ('lstm', 'gru', 'rnn_relu', 'rnn_tanh').
+    input_mode: indicate whether there is a linear projection between the
+      input and the actual computation before the first layer. It could be
+      'linear_input', 'skip_input' or 'auto_select'.
+      'linear_input' (default) always applies a linear projection of input
+      onto RNN hidden state. (standard RNN behavior).
+      'skip_input' is only allowed when input_size == num_units;
+      'auto_select' implies 'skip_input' when input_size == num_units;
+      otherwise, it implies 'linear_input'.
+    direction: the direction model that the model operates. Could be either
+        'unidirectional' or 'bidirectional'
+    dropout: whether to enable dropout. With it is 0, dropout is disabled.
+    seed: the op seed used for initializing dropout. See @{tf.set_random_seed}
+        for behavior.
+    name: name of the operation.
+  Returns:
+    outputs, output_h, output_c
+  """
+  _check_rnn_mode(rnn_mode)
+  _check_direction(direction)
+  seed, seed2 = random_seed.get_seed(seed)
+  outputs, output_h, output_c, _ = gen_cudnn_rnn_ops.cudnn_rnn(
+      input=inputs,
+      input_h=input_h,
+      input_c=input_c,
+      params=params,
+      is_training=is_training,
+      rnn_mode=rnn_mode,
+      input_mode=input_mode,
+      direction=direction,
+      dropout=dropout,
+      seed=seed,
+      seed2=seed2,
+      name=name)
+  return (outputs, output_h, output_c)
+
+
+def cudnn_lstm(inputs,
+               input_h,
+               input_c,
+               params,
+               is_training,
+               input_mode=CUDNN_INPUT_LINEAR_MODE,
+               direction=CUDNN_RNN_UNIDIRECTION,
+               dropout=0.,
+               seed=0,
+               name=None):
+  """Cudnn LSTM.
+
+  Args:
+    inputs: the input sequence to the RNN model. A Tensor of shape [?,
+      batch_size, input_size].
+    input_h: the initial hidden state for h. A Tensor of shape [num_layers,
+      batch_size, num_units].
+    input_c: the initial hidden state for c. This is only relevant for LSTM.
+      A Tensor of the same shape as input_h.
+    params: the parameter buffer created for this model.
+    is_training: whether this operation will be used in training or inference
+      input_mode: indicate whether there is a linear projection between the
+        input and the actual computation before the first layer. It could be
+        'linear_input', 'skip_input' or 'auto_select'.
+        'linear_input' (default) always applies a linear projection of input
+        onto RNN hidden state. (standard RNN behavior).
+        'skip_input' is only allowed when input_size == num_units;
+        'auto_select' implies 'skip_input' when input_size == num_units;
+        otherwise, it implies 'linear_input'.
+    direction: the direction model that the model operates. Could be either
+        'unidirectional' or 'bidirectional'
+    dropout: whether to enable dropout. With it is 0, dropout is disabled.
+    seed: the op seed used for initializing dropout. See @{tf.set_random_seed}
+        for behavior.
+    name: name of the operation.
+  Returns:
+    outputs, output_h, output_c
+  """
+  return _cudnn_rnn(inputs, input_h, input_c, params, is_training, CUDNN_LSTM,
+                    input_mode, direction, dropout, seed, name)
+
+
+def _cudnn_rnn_no_input_c(inputs,
+                          input_h,
+                          params,
+                          is_training,
+                          rnn_mode,
+                          input_mode=CUDNN_INPUT_LINEAR_MODE,
+                          direction=CUDNN_RNN_UNIDIRECTION,
+                          dropout=0.,
+                          seed=0,
+                          name=None):
+  """Cudnn RNN w/o input_c.
+
+  Args:
+    inputs: the input sequence to the RNN model. A Tensor of shape [?,
+      batch_size, input_size].
+    input_h: the initial hidden state for h. A Tensor of shape [num_layers,
+      batch_size, num_units].
+    params: the parameter buffer created for this model.
+    is_training: whether this operation will be used in training or inference
+    rnn_mode: one of ('lstm', 'gru', 'rnn_relu', 'rnn_tanh').
+    input_mode: indicate whether there is a linear projection between the
+      input and the actual computation before the first layer. It could be
+      'linear_input', 'skip_input' or 'auto_select'.
+      'linear_input' (default) always applies a linear projection of input
+      onto RNN hidden state. (standard RNN behavior).
+      'skip_input' is only allowed when input_size == num_units;
+      'auto_select' implies 'skip_input' when input_size == num_units;
+      otherwise, it implies 'linear_input'.
+    direction: the direction model that the model operates. Could be either
+        'unidirectional' or 'bidirectional'
+    dropout: whether to enable dropout. With it is 0, dropout is disabled.
+    seed: the op seed used for initializing dropout. See @{tf.set_random_seed}
+        for behavior.
+    name: name of the operation.
+  Returns:
+    outputs, output_h
+  """
+  input_c = array_ops.constant([], dtype=input_h.dtype)
+  outputs, output_h, _ = _cudnn_rnn(inputs, input_h, input_c, params,
+                                    is_training, rnn_mode, input_mode,
+                                    direction, dropout, seed, name)
+  return outputs, output_h
+
+
+def cudnn_gru(inputs,
+              input_h,
+              params,
+              is_training,
+              input_mode=CUDNN_INPUT_LINEAR_MODE,
+              direction=CUDNN_RNN_UNIDIRECTION,
+              dropout=0.,
+              seed=0,
+              name=None):
+  """Cudnn GRU.
+
+  Args:
+    inputs: the input sequence to the RNN model. A Tensor of shape [?,
+      batch_size, input_size].
+    input_h: the initial hidden state for h. A Tensor of shape [num_layers,
+      batch_size, num_units].
+    params: the parameter buffer created for this model.
+    is_training: whether this operation will be used in training or inference
+      input_mode: indicate whether there is a linear projection between the
+        input and the actual computation before the first layer. It could be
+        'linear_input', 'skip_input' or 'auto_select'.
+        'linear_input' (default) always applies a linear projection of input
+        onto RNN hidden state. (standard RNN behavior).
+        'skip_input' is only allowed when input_size == num_units;
+        'auto_select' implies 'skip_input' when input_size == num_units;
+        otherwise, it implies 'linear_input'.
+    direction: the direction model that the model operates. Could be either
+        'unidirectional' or 'bidirectional'
+    dropout: whether to enable dropout. With it is 0, dropout is disabled.
+    seed: the op seed used for initializing dropout. See @{tf.set_random_seed}
+        for behavior.
+    name: name of the operation.
+  Returns:
+    outputs, output_h
+  """
+  return _cudnn_rnn_no_input_c(inputs, input_h, params, is_training, CUDNN_GRU,
+                               input_mode, direction, dropout, seed, name)
+
+
+def cudnn_rnn_relu(inputs,
+                   input_h,
+                   params,
+                   is_training,
+                   input_mode=CUDNN_INPUT_LINEAR_MODE,
+                   direction=CUDNN_RNN_UNIDIRECTION,
+                   dropout=0.,
+                   seed=0,
+                   name=None):
+  """Cudnn RNN Relu.
+
+  Args:
+    inputs: the input sequence to the RNN model. A Tensor of shape [?,
+      batch_size, input_size].
+    input_h: the initial hidden state for h. A Tensor of shape [num_layers,
+      batch_size, num_units].
+    params: the parameter buffer created for this model.
+    is_training: whether this operation will be used in training or inference
+      input_mode: indicate whether there is a linear projection between the
+        input and the actual computation before the first layer. It could be
+        'linear_input', 'skip_input' or 'auto_select'.
+        'linear_input' (default) always applies a linear projection of input
+        onto RNN hidden state. (standard RNN behavior).
+        'skip_input' is only allowed when input_size == num_units;
+        'auto_select' implies 'skip_input' when input_size == num_units;
+        otherwise, it implies 'linear_input'.
+    direction: the direction model that the model operates. Could be either
+        'unidirectional' or 'bidirectional'
+    dropout: whether to enable dropout. With it is 0, dropout is disabled.
+    seed: the op seed used for initializing dropout. See @{tf.set_random_seed}
+        for behavior.
+    name: name of the operation.
+  Returns:
+    outputs, output_h
+  """
+  return _cudnn_rnn_no_input_c(inputs, input_h, params, is_training,
+                               CUDNN_RNN_RELU, input_mode, direction, dropout,
+                               seed, name)
+
+
+def cudnn_rnn_tanh(inputs,
+                   input_h,
+                   params,
+                   is_training,
+                   input_mode=CUDNN_INPUT_LINEAR_MODE,
+                   direction=CUDNN_RNN_UNIDIRECTION,
+                   dropout=0.,
+                   seed=0,
+                   name=None):
+  """Cudnn RNN Tanh.
+
+  Args:
+    inputs: the input sequence to the RNN model. A Tensor of shape [?,
+      batch_size, input_size].
+    input_h: the initial hidden state for h. A Tensor of shape [num_layers,
+      batch_size, num_units].
+    params: the parameter buffer created for this model.
+    is_training: whether this operation will be used in training or inference
+      input_mode: indicate whether there is a linear projection between the
+        input and the actual computation before the first layer. It could be
+        'linear_input', 'skip_input' or 'auto_select'.
+        'linear_input' (default) always applies a linear projection of input
+        onto RNN hidden state. (standard RNN behavior).
+        'skip_input' is only allowed when input_size == num_units;
+        'auto_select' implies 'skip_input' when input_size == num_units;
+        otherwise, it implies 'linear_input'.
+    direction: the direction model that the model operates. Could be either
+        'unidirectional' or 'bidirectional'
+    dropout: whether to enable dropout. With it is 0, dropout is disabled.
+    seed: the op seed used for initializing dropout. See @{tf.set_random_seed}
+        for behavior.
+    name: name of the operation.
+  Returns:
+    outputs, output_h
+  """
+  return _cudnn_rnn_no_input_c(inputs, input_h, params, is_training,
+                               CUDNN_RNN_TANH, input_mode, direction, dropout,
+                               seed, name)
+
+
+def cudnn_rnn_params_to_canonical(rnn_mode,
+                                  num_layers,
+                                  num_units,
+                                  input_size,
+                                  params,
+                                  input_mode=CUDNN_INPUT_LINEAR_MODE,
+                                  direction=CUDNN_RNN_UNIDIRECTION,
+                                  dropout=0,
+                                  seed=0,
+                                  name=None):
+  """Convert cudnn opaque params to canonical.
+
+  Args:
+    rnn_mode: a string specifies the mode, under which this RNN model runs.
+        Could be either 'lstm', 'gru', 'rnn_tanh' or 'rnn_relu'.
+    num_layers: the number of layers for the RNN model.
+    num_units: the number of units within the RNN model.
+    input_size: the size of the input, it could be different from the
+        num_units.
+    params: opaque cudnn params var.
+    input_mode: indicate whether there is a linear projection between the
+        input and the actual computation before the first layer. It could be
+        'linear_input', 'skip_input' or 'auto_select'.
+        'linear_input' (default) always applies a linear projection of input
+        onto RNN hidden state. (standard RNN behavior).
+        'skip_input' is only allowed when input_size == num_units;
+        'auto_select' implies 'skip_input' when input_size == num_units;
+        otherwise, it implies 'linear_input'.
+    direction: the direction model that the model operates. Could be either
+        'unidirectional' or 'bidirectional'
+    dropout: whether to enable dropout. With it is 0, dropout is disabled.
+    seed: the op seed used for initializing dropout. See @{tf.set_random_seed}
+        for behavior.
+    name: name of the operation.
+  Returns:
+    weights list and bias list
+  Raises:
+    ValueError: if rnn_mode or direction is invalid.
+  """
+
+  _check_rnn_mode(rnn_mode)
+  _check_direction(direction)
+  num_params = _get_num_params(rnn_mode, num_layers, direction)
+  seed, seed2 = random_seed.get_seed(seed)
+  weights, biases = gen_cudnn_rnn_ops.cudnn_rnn_params_to_canonical(
+      rnn_mode=rnn_mode,
+      num_layers=num_layers,
+      num_units=num_units,
+      input_size=input_size,
+      params=params,
+      input_mode=input_mode,
+      direction=direction,
+      dropout=dropout,
+      seed=seed,
+      seed2=seed2,
+      num_params=num_params,
+      name=name)
+  return weights, biases
+
+
+def cudnn_rnn_canonical_to_params(rnn_mode,
+                                  num_layers,
+                                  num_units,
+                                  input_size,
+                                  weights,
+                                  biases,
+                                  input_mode=CUDNN_INPUT_LINEAR_MODE,
+                                  direction=CUDNN_RNN_UNIDIRECTION,
+                                  dropout=0,
+                                  seed=0,
+                                  name=None):
+  """Converts params from the canonical format to a specific format of cuDNN.
+
+  Args:
+    rnn_mode: a string specifies the mode, under which this RNN model runs.
+        Could be either 'lstm', 'gru', 'rnn_tanh' or 'rnn_relu'.
+    num_layers: the number of layers for the RNN model.
+    num_units: the number of units within the RNN model.
+    input_size: the size of the input, it could be different from the
+        num_units.
+    weights: a Tensor for weight parameters.
+    biases: a Tensor for bias parameters.
+    input_mode: indicate whether there is a linear projection between the
+        input and the actual computation before the first layer. It could be
+        'linear_input', 'skip_input' or 'auto_select'.
+        'linear_input' (default) always applies a linear projection of input
+        onto RNN hidden state. (standard RNN behavior).
+        'skip_input' is only allowed when input_size == num_units;
+        'auto_select' implies 'skip_input' when input_size == num_units;
+        otherwise, it implies 'linear_input'.
+    direction: the direction model that the model operates. Could be either
+        'unidirectional' or 'bidirectional'
+    dropout: whether to enable dropout. With it is 0, dropout is disabled.
+    seed: the op seed used for initializing dropout. See @{tf.set_random_seed}
+        for behavior.
+    name: name of the operation.
+  Returns:
+    an opaque Cudnn param.
+  Raises:
+    ValueError: if rnn_mode or direction is invalid.
+  """
+  _check_rnn_mode(rnn_mode)
+  _check_direction(direction)
+  seed, seed2 = random_seed.get_seed(seed)
+  return gen_cudnn_rnn_ops.cudnn_rnn_canonical_to_params(
+      rnn_mode=rnn_mode,
+      num_layers=num_layers,
+      num_units=num_units,
+      input_size=input_size,
+      weights=weights,
+      biases=biases,
+      input_mode=input_mode,
+      direction=direction,
+      dropout=dropout,
+      seed=seed,
+      seed2=seed2,
+      name=name)
+
+
+def cudnn_opaque_params_size(rnn_mode,
+                             num_layers,
+                             num_units,
+                             input_size,
+                             input_mode=CUDNN_INPUT_LINEAR_MODE,
+                             direction=CUDNN_RNN_UNIDIRECTION,
+                             dtype=dtypes.float32,
+                             dropout=0,
+                             seed=0,
+                             name=None):
+  """Returns opaque params size for specific Cudnn config.
+
+  Args:
+    rnn_mode: a string specifies the mode, under which this RNN model runs.
+        Could be either 'lstm', 'gru', 'rnn_tanh' or 'rnn_relu'.
+    num_layers: the number of layers for the RNN model.
+    num_units: the number of units within the RNN model.
+    input_size: the size of the input, it could be different from the
+        num_units.
+    input_mode: indicate whether there is a linear projection between the
+        input and the actual computation before the first layer. It could be
+        'linear_input', 'skip_input' or 'auto_select'.
+        'linear_input' (default) always applies a linear projection of input
+        onto RNN hidden state. (standard RNN behavior).
+        'skip_input' is only allowed when input_size == num_units;
+        'auto_select' implies 'skip_input' when input_size == num_units;
+        otherwise, it implies 'linear_input'.
+    direction: the direction model that the model operates. Could be either
+        'unidirectional' or 'bidirectional'
+    dtype: one of tf.float32 or tf.float64.
+    dropout: whether to enable dropout. With it is 0, dropout is disabled.
+    seed: the op seed used for initializing dropout. See @{tf.set_random_seed}
+        for behavior.
+    name: name of the operation.
+  Returns:
+    a int, size of Cudnn opaque params.
+  Raises:
+    ValueError: if rnn_mode or direction is invalid.
+  """
+  _check_rnn_mode(rnn_mode)
+  _check_direction(direction)
+  seed, seed2 = random_seed.get_seed(seed)
+  return gen_cudnn_rnn_ops.cudnn_rnn_params_size(
+      rnn_mode=rnn_mode,
+      num_layers=num_layers,
+      num_units=num_units,
+      input_size=input_size,
+      T=dtype,
+      S=dtypes.int32,
+      dropout=dropout,
+      seed=seed,
+      seed2=seed2,
+      input_mode=input_mode,
+      direction=direction,
+      name=name)[0]
+
+
 class _CudnnRNN(object):
   """Creates an RNN model using the underlying Cudnn implementation.
 
@@ -761,9 +1237,6 @@ class _CudnnRNN(object):
     Raises:
       ValueError: if direction is invalid.
     """
-    if direction not in (CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION):
-      raise ValueError("Invalid direction: %s, expect %s or %s",
-                       direction, CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION)
     self._num_layers = num_layers
     self._num_units = num_units
     self._input_size = input_size
@@ -772,10 +1245,7 @@ class _CudnnRNN(object):
     self._direction = direction
     self._dtype = dtype
     self._dropout = dropout
-    # get graph and op seed.
-    self._seed, self._seed2 = random_seed.get_seed(seed)
-    if self._seed is None and self._seed2 is None:
-      self._seed, self._seed2 = 0, 0
+    self._seed = seed
 
   @property
   def input_mode(self):
@@ -807,18 +1277,16 @@ class _CudnnRNN(object):
     Returns:
       The calculated parameter buffer size.
     """
-    return gen_cudnn_rnn_ops.cudnn_rnn_params_size(
+    return cudnn_opaque_params_size(
+        rnn_mode=self._rnn_mode,
         num_layers=self._num_layers,
         num_units=self._num_units,
         input_size=self._input_size,
-        T=self._dtype,
-        S=dtypes.int32,
+        dtype=self._dtype,
         dropout=self._dropout,
         seed=self._seed,
-        seed2=self._seed2,
-        rnn_mode=self._rnn_mode,
         input_mode=self._input_mode,
-        direction=self._direction)[0]
+        direction=self._direction)
 
   def __call__(self, input_data, input_h, input_c, params, is_training=True):
     """Runs the forward step for the RNN model.
@@ -837,22 +1305,17 @@ class _CudnnRNN(object):
       output_h: the final state for h.
       output_c: the final state for c. This is only relevant for LSTM.
     """
-    if self._rnn_mode != CUDNN_LSTM:
-      # For model that doesn't take input_c, replace with a dummy tensor.
-      input_c = array_ops.constant([], dtype=self._dtype)
-    output, output_h, output_c, _ = gen_cudnn_rnn_ops.cudnn_rnn(
-        input=input_data,
-        input_h=input_h,
-        input_c=input_c,
-        params=params,
-        rnn_mode=self._rnn_mode,
+    return _cudnn_rnn(
+        input_data,
+        input_h,
+        input_c,
+        params,
+        is_training,
+        self._rnn_mode,
         input_mode=self._input_mode,
         direction=self._direction,
         dropout=self._dropout,
-        seed=self._seed,
-        seed2=self._seed2,
-        is_training=is_training)
-    return (output, output_h, output_c)
+        seed=self._seed)
 
   def params_to_canonical(self, params):
     """Converts params from a specific format of cuDNN to the canonical format.
@@ -863,22 +1326,16 @@ class _CudnnRNN(object):
     Returns:
       A function for the specific-to-canonical conversion.
     """
-    num_params = self._num_layers * self._NUM_PARAMS_PER_LAYER
-    if self._direction != CUDNN_RNN_UNIDIRECTION:
-      num_params *= 2
-    weights, biases = gen_cudnn_rnn_ops.cudnn_rnn_params_to_canonical(
+    return cudnn_rnn_params_to_canonical(
+        rnn_mode=self._rnn_mode,
         num_layers=self._num_layers,
         num_units=self._num_units,
         input_size=self._input_size,
         params=params,
-        dropout=self._dropout,
-        seed=self._seed,
-        seed2=self._seed2,
-        num_params=num_params,
-        rnn_mode=self._rnn_mode,
         input_mode=self._input_mode,
-        direction=self._direction)
-    return weights, biases
+        direction=self._direction,
+        dropout=self._dropout,
+        seed=self._seed)
 
   def canonical_to_params(self, weights, biases):
     """Converts params from the canonical format to a specific format of cuDNN.
@@ -890,18 +1347,17 @@ class _CudnnRNN(object):
     Returns:
       A function for the canonical-to-params-to-specific conversion..
     """
-    return gen_cudnn_rnn_ops.cudnn_rnn_canonical_to_params(
+    return cudnn_rnn_canonical_to_params(
+        rnn_mode=self._rnn_mode,
         num_layers=self._num_layers,
         num_units=self._num_units,
         input_size=self._input_size,
         weights=weights,
         biases=biases,
-        dropout=self._dropout,
-        seed=self._seed,
-        seed2=self._seed2,
-        rnn_mode=self._rnn_mode,
         input_mode=self._input_mode,
-        direction=self._direction)
+        direction=self._direction,
+        dropout=self._dropout,
+        seed=self._seed)
 
 
 class CudnnLSTM(_CudnnRNN):
@@ -1036,9 +1492,16 @@ class _CudnnRNNNoInputC(_CudnnRNN):
       output: the output sequuence.
       output_h: the final state for h.
     """
-    output, output_h, _ = super(_CudnnRNNNoInputC, self).__call__(
-        input_data, input_h, None, params, is_training=is_training)
-    return (output, output_h)
+    return _cudnn_rnn_no_input_c(
+        input_data,
+        input_h,
+        params,
+        is_training,
+        self._rnn_mode,
+        input_mode=self._input_mode,
+        direction=self._direction,
+        dropout=self._dropout,
+        seed=self._seed)
 
 
 class CudnnGRU(_CudnnRNNNoInputC):
-- 
GitLab


From 62742c14779062dc105b098142fff2c88c3a75a3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 17:55:02 -0700
Subject: [PATCH 133/294] Add code to extract feature importance.

PiperOrigin-RevId: 167077754
---
 .../estimator_batch/custom_export_strategy.py | 67 +++++++++++++++++--
 .../custom_export_strategy_test.py            | 18 ++++-
 2 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
index c377c50e9f..a8b60460c8 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py
@@ -18,6 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
+import os
+
 from tensorflow.contrib.boosted_trees.proto import tree_config_pb2
 from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch
 from tensorflow.contrib.decision_trees.proto import generic_tree_model_extensions_pb2
@@ -26,18 +29,21 @@ from tensorflow.contrib.learn.python.learn import export_strategy
 from tensorflow.contrib.learn.python.learn.utils import saved_model_export_utils
 from tensorflow.python.client import session as tf_session
 from tensorflow.python.framework import ops
+from tensorflow.python.platform import gfile
 from tensorflow.python.saved_model import loader as saved_model_loader
 from tensorflow.python.saved_model import tag_constants
 
 
-def make_custom_export_strategy(name, convert_fn, feature_columns,
+def make_custom_export_strategy(name,
+                                convert_fn,
+                                feature_columns,
                                 export_input_fn):
   """Makes custom exporter of GTFlow tree format.
 
   Args:
     name: A string, for the name of the export strategy.
     convert_fn: A function that converts the tree proto to desired format and
-      saves it to the desired location.
+      saves it to the desired location. Can be None to skip conversion.
     feature_columns: A list of feature columns.
     export_input_fn: A function that takes no arguments and returns an
       `InputFnOps`.
@@ -68,9 +74,22 @@ def make_custom_export_strategy(name, convert_fn, feature_columns,
         dtec = tree_config_pb2.DecisionTreeEnsembleConfig()
         dtec.ParseFromString(dfec_str)
         # Export the result in the same folder as the saved model.
-        convert_fn(dtec, sorted_feature_names, len(dense_floats),
-                   len(sparse_float_indices), len(sparse_int_indices),
-                   result_dir, eval_result)
+        if convert_fn:
+          convert_fn(dtec, sorted_feature_names,
+                     len(dense_floats),
+                     len(sparse_float_indices),
+                     len(sparse_int_indices), result_dir, eval_result)
+        feature_importances = _get_feature_importances(
+            dtec, sorted_feature_names,
+            len(dense_floats),
+            len(sparse_float_indices), len(sparse_int_indices))
+        sorted_by_importance = sorted(
+            feature_importances.items(), key=lambda x: -x[1])
+        assets_dir = os.path.join(result_dir, "assets.extra")
+        gfile.MakeDirs(assets_dir)
+        with gfile.GFile(os.path.join(assets_dir, "feature_importances"),
+                         "w") as f:
+          f.write("\n".join("%s, %f" % (k, v) for k, v in sorted_by_importance))
     return result_dir
   return export_strategy.ExportStrategy(name, export_fn)
 
@@ -157,3 +176,41 @@ def convert_to_universal_format(dtec, sorted_feature_names,
         node.left_child_id.value = split.left_id
         node.right_child_id.value = split.right_id
   return model_and_features
+
+
+def _get_feature_importances(dtec, feature_names, num_dense_floats,
+                             num_sparse_float, num_sparse_int):
+  """Export the feature importance per feature column."""
+  del num_sparse_int    # Unused.
+  sums = collections.defaultdict(lambda: 0)
+  for tree_idx in range(len(dtec.trees)):
+    tree = dtec.trees[tree_idx]
+    for tree_node in tree.nodes:
+      node_type = tree_node.WhichOneof("node")
+      if node_type == "dense_float_binary_split":
+        split = tree_node.dense_float_binary_split
+        split_column = feature_names[split.feature_column]
+      elif node_type == "sparse_float_binary_split_default_left":
+        split = tree_node.sparse_float_binary_split_default_left.split
+        split_column = feature_names[split.feature_column + num_dense_floats]
+      elif node_type == "sparse_float_binary_split_default_right":
+        split = tree_node.sparse_float_binary_split_default_right.split
+        split_column = feature_names[split.feature_column + num_dense_floats]
+      elif node_type == "categorical_id_binary_split":
+        split = tree_node.categorical_id_binary_split
+        split_column = feature_names[split.feature_column + num_dense_floats +
+                                     num_sparse_float]
+      elif node_type == "categorical_id_set_membership_binary_split":
+        split = tree_node.categorical_id_set_membership_binary_split
+        split_column = feature_names[split.feature_column + num_dense_floats +
+                                     num_sparse_float]
+      elif node_type == "leaf":
+        assert tree_node.node_metadata.gain == 0
+        continue
+      else:
+        raise ValueError("Unexpected split type %s", node_type)
+      # Apply shrinkage factor. It is important since it is not always uniform
+      # across different trees.
+      sums[split_column] += (
+          tree_node.node_metadata.gain * dtec.tree_weights[tree_idx])
+  return dict(sums)
diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py
index 8d801fa1f3..4ed18b2d34 100644
--- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py
+++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py
@@ -27,7 +27,7 @@ from tensorflow.python.platform import googletest
 
 class ConvertModelTest(test_util.TensorFlowTestCase):
 
-  def testConvertModel(self):
+  def _make_trees(self):
     dtec_str = """
     trees {
       nodes {
@@ -108,8 +108,12 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
     """
     dtec = tree_config_pb2.DecisionTreeEnsembleConfig()
     text_format.Merge(dtec_str, dtec)
-    # The feature columns in the order they were added.
     feature_columns = ["feature_b", "feature_a", "feature_d"]
+    return dtec, feature_columns
+
+  def testConvertModel(self):
+    dtec, feature_columns = self._make_trees()
+    # The feature columns in the order they were added.
     out = custom_export_strategy.convert_to_universal_format(
         dtec, feature_columns, 1, 1,
         1)
@@ -273,6 +277,16 @@ class ConvertModelTest(test_util.TensorFlowTestCase):
     }"""
     self.assertProtoEquals(expected_tree, out)
 
+  def testFeatureImportance(self):
+    dtec, feature_columns = self._make_trees()
+    feature_importances = custom_export_strategy._get_feature_importances(
+        dtec, feature_columns, 1, 1, 1)
+    self.assertItemsEqual(["feature_b", "feature_a", "feature_d"],
+                          feature_importances.keys())
+    self.assertAlmostEqual(50.0, feature_importances["feature_b"], places=4)
+    self.assertAlmostEqual(50.0, feature_importances["feature_a"], places=4)
+    self.assertAlmostEqual(50.0, feature_importances["feature_d"], places=4)
+
 
 if __name__ == "__main__":
   googletest.main()
-- 
GitLab


From 87c7e2c515d8ed032801483459cdc4528aea540d Mon Sep 17 00:00:00 2001
From: Shanqing Cai 
Date: Wed, 30 Aug 2017 18:15:01 -0700
Subject: [PATCH 134/294] TFE: Simplify tfe.Tensor.__str__() output

PiperOrigin-RevId: 167079964
---
 tensorflow/python/eager/tensor_test.py | 14 +++++++-------
 tensorflow/python/framework/ops.py     |  8 ++++----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py
index 8d0f639ddc..bd8e653b97 100644
--- a/tensorflow/python/eager/tensor_test.py
+++ b/tensorflow/python/eager/tensor_test.py
@@ -77,8 +77,8 @@ class TFETensorTest(test_util.TensorFlowTestCase):
   def testMultiLineTensorStr(self):
     t = tensor.Tensor(np.eye(3))
     tensor_str = str(t)
-    self.assertIn("shape=%s, dtype=%s, " % (t.shape, t.dtype.name), tensor_str)
-    self.assertIn("numpy=\n%s" % t.numpy(), tensor_str)
+    self.assertIn("shape=%s, dtype=%s" % (t.shape, t.dtype.name), tensor_str)
+    self.assertIn(str(t.numpy()), tensor_str)
 
   def testMultiLineTensorRepr(self):
     t = tensor.Tensor(np.eye(3))
@@ -95,7 +95,7 @@ class TFETensorTest(test_util.TensorFlowTestCase):
     np.set_printoptions(threshold=2, edgeitems=1)
 
     t = tensor.Tensor(np.arange(10, dtype=np.int32))
-    self.assertIn("numpy=[0 ..., 9]", str(t))
+    self.assertIn("[0 ..., 9]", str(t))
     self.assertIn("[0, ..., 9]", repr(t))
 
     # Clean up: reset to previous printoptions.
@@ -103,7 +103,7 @@ class TFETensorTest(test_util.TensorFlowTestCase):
 
   def testZeroDimTensorStr(self):
     t = tensor.Tensor(42)
-    self.assertIn("shape=(), dtype=int32, numpy=42", str(t))
+    self.assertIn("42, shape=(), dtype=int32", str(t))
 
   def testZeroDimTensorRepr(self):
     t = tensor.Tensor(42)
@@ -113,7 +113,7 @@ class TFETensorTest(test_util.TensorFlowTestCase):
 
   def testZeroSizeTensorStr(self):
     t = tensor.Tensor(np.zeros(0, dtype=np.float32))
-    self.assertIn("shape=(0,), dtype=float32, numpy=[]", str(t))
+    self.assertIn("[], shape=(0,), dtype=float32", str(t))
 
   def testZeroSizeTensorRepr(self):
     t = tensor.Tensor(np.zeros(0, dtype=np.float32))
@@ -127,8 +127,8 @@ class TFETensorTest(test_util.TensorFlowTestCase):
     t = tensor.Tensor(42)
     # Force change dtype to a numpy-unprintable type.
     t._dtype = dtypes.resource
-    self.assertIn("numpy=", str(t))
-    self.assertIn("numpy=", repr(t))
+    self.assertIn("", str(t))
+    self.assertIn("", repr(t))
 
   def testStringTensor(self):
     t_np_orig = np.array([[b"a", b"ab"], [b"abc", b"abcd"]])
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index da3757190c..ccaa2141b5 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -721,12 +721,12 @@ class EagerTensor(Tensor):
     return numpy_text
 
   def __str__(self):
-    return "tfe.Tensor(shape=%s, dtype=%s, numpy=%s)" % (self.shape,
-                                                         self.dtype.name,
-                                                         self._numpy_text())
+    return "tf.Tensor(%s, shape=%s, dtype=%s)" % (self._numpy_text(),
+                                                  self.shape,
+                                                  self.dtype.name)
 
   def __repr__(self):
-    return "" % (
+    return "" % (
         self._id, self.shape, self.dtype.name, self._numpy_text(is_repr=True))
 
   @staticmethod
-- 
GitLab


From 5c3977c297f07d1cc14591844e5df202b1994c85 Mon Sep 17 00:00:00 2001
From: Benoit Steiner 
Date: Wed, 30 Aug 2017 18:23:50 -0700
Subject: [PATCH 135/294] Don't use std::pair on GPU since it's not supported
 by nvcc

PiperOrigin-RevId: 167080772
---
 tensorflow/core/kernels/conv_2d.h | 10 +++++-----
 tensorflow/core/kernels/pad_op.cc |  6 +++---
 tensorflow/core/kernels/pad_op.h  |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/kernels/conv_2d.h b/tensorflow/core/kernels/conv_2d.h
index 4bb0b7f3b4..8de8f1b265 100644
--- a/tensorflow/core/kernels/conv_2d.h
+++ b/tensorflow/core/kernels/conv_2d.h
@@ -225,13 +225,13 @@ struct PadInput {
                   const std::array& padding_right,
                   typename TTypes::Tensor out,
                   TensorFormat format) {
-    Eigen::array, NDIMS> padding;
-    padding[GetTensorDimIndex(format, 'N')] = std::make_pair(0, 0);
+    Eigen::array, NDIMS> padding;
+    padding[GetTensorDimIndex(format, 'N')] = {0, 0};
     for (int i = 0; i < NDIMS - 2; ++i) {
-      padding[GetTensorDimIndex(format, '0' + i)] =
-          std::make_pair(padding_left[i], padding_right[i]);
+      padding[GetTensorDimIndex(format, '0' + i)] = {
+          padding_left[i], padding_right[i]};
     }
-    padding[GetTensorDimIndex(format, 'C')] = std::make_pair(0, 0);
+    padding[GetTensorDimIndex(format, 'C')] = {0, 0};
     out.device(d) = in.pad(padding);
   }
 };
diff --git a/tensorflow/core/kernels/pad_op.cc b/tensorflow/core/kernels/pad_op.cc
index 6e8b09d050..6196c5ed93 100644
--- a/tensorflow/core/kernels/pad_op.cc
+++ b/tensorflow/core/kernels/pad_op.cc
@@ -146,9 +146,9 @@ class PadOp : public OpKernel {
                Tensor* output) {
     CHECK_EQ(Dims, paddings.dimension(0));
     CHECK_EQ(2, paddings.dimension(1));
-    Eigen::array, Dims> paddings_array;
+    Eigen::array, Dims> paddings_array;
     for (int i = 0; i < Dims; ++i) {
-      paddings_array[i] = std::make_pair(paddings(i, 0), paddings(i, 1));
+      paddings_array[i] = {paddings(i, 0), paddings(i, 1)};
     }
     functor::Pad functor;
     functor(context->eigen_device(), output->tensor(), input,
@@ -180,7 +180,7 @@ namespace functor {
   void Pad::operator()(                               \
       const GPUDevice& d, typename TTypes::Tensor output,        \
       typename TTypes::ConstTensor input,                        \
-      Eigen::array, Dims> paddings, T pad_value); \
+      Eigen::array, Dims> paddings, T pad_value); \
   extern template struct Pad;
 
 #define DECLARE_GPU_SPECS(T) \
diff --git a/tensorflow/core/kernels/pad_op.h b/tensorflow/core/kernels/pad_op.h
index 6a973833e2..95a7c9a3ae 100644
--- a/tensorflow/core/kernels/pad_op.h
+++ b/tensorflow/core/kernels/pad_op.h
@@ -31,7 +31,7 @@ struct Pad {
   // See pad_op.cc for details.
   void operator()(const Device& d, typename TTypes::Tensor output,
                   typename TTypes::ConstTensor input,
-                  Eigen::array, Dims> paddings,
+                  Eigen::array, Dims> paddings,
                   T pad_value) {
     if (Eigen::internal::is_same::value &&
         (output.size() <= std::numeric_limits::max())) {
@@ -47,7 +47,7 @@ struct Pad {
   // In the scalar case we simply copy the input.
   void operator()(const Device& d, typename TTypes::Tensor output,
                   typename TTypes::ConstTensor input,
-                  Eigen::array, 0>, T) {
+                  Eigen::array, 0>, T) {
     output.device(d) = input;
   }
 };
-- 
GitLab


From 114e129d6c31216e5b62ef1e78a77c3ae7afe82e Mon Sep 17 00:00:00 2001
From: Francois Chollet 
Date: Wed, 30 Aug 2017 18:31:41 -0700
Subject: [PATCH 136/294] Fix TSAN flakes in Keras io_utils test.

PiperOrigin-RevId: 167081436
---
 tensorflow/contrib/keras/BUILD                |  1 +
 .../keras/python/keras/utils/io_utils_test.py | 75 ++++++++++---------
 2 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/tensorflow/contrib/keras/BUILD b/tensorflow/contrib/keras/BUILD
index a09045d7fd..26f0e41518 100644
--- a/tensorflow/contrib/keras/BUILD
+++ b/tensorflow/contrib/keras/BUILD
@@ -551,6 +551,7 @@ py_test(
     size = "small",
     srcs = ["python/keras/utils/io_utils_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["notsan"],
     deps = [
         ":keras",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/contrib/keras/python/keras/utils/io_utils_test.py b/tensorflow/contrib/keras/python/keras/utils/io_utils_test.py
index baa9781e71..f6820ee039 100644
--- a/tensorflow/contrib/keras/python/keras/utils/io_utils_test.py
+++ b/tensorflow/contrib/keras/python/keras/utils/io_utils_test.py
@@ -57,43 +57,44 @@ class TestIOUtils(test.TestCase):
     h5_path = os.path.join(temp_dir, 'test.h5')
     create_dataset(h5_path)
 
-    # Instantiating HDF5Matrix for the training set,
-    # which is a slice of the first 150 elements
-    x_train = keras.utils.io_utils.HDF5Matrix(
-        h5_path, 'my_data', start=0, end=150)
-    y_train = keras.utils.io_utils.HDF5Matrix(
-        h5_path, 'my_labels', start=0, end=150)
-
-    # Likewise for the test set
-    x_test = keras.utils.io_utils.HDF5Matrix(
-        h5_path, 'my_data', start=150, end=200)
-    y_test = keras.utils.io_utils.HDF5Matrix(
-        h5_path, 'my_labels', start=150, end=200)
-
-    # HDF5Matrix behave more or less like Numpy matrices
-    # with regard to indexing
-    self.assertEqual(y_train.shape, (150, 1))
-    # But they do not support negative indices, so don't try print(x_train[-1])
-
-    self.assertEqual(y_train.dtype, np.dtype('i'))
-    self.assertEqual(y_train.ndim, 2)
-    self.assertEqual(y_train.size, 150)
-
-    model = keras.models.Sequential()
-    model.add(keras.layers.Dense(64, input_shape=(10,), activation='relu'))
-    model.add(keras.layers.Dense(1, activation='sigmoid'))
-    model.compile(loss='binary_crossentropy', optimizer='sgd')
-
-    # Note: you have to use shuffle='batch' or False with HDF5Matrix
-    model.fit(x_train, y_train, batch_size=32, shuffle='batch', verbose=False)
-    # test that evalutation and prediction
-    # don't crash and return reasonable results
-    out_pred = model.predict(x_test, batch_size=32, verbose=False)
-    out_eval = model.evaluate(x_test, y_test, batch_size=32, verbose=False)
-
-    self.assertEqual(out_pred.shape, (50, 1))
-    self.assertEqual(out_eval.shape, ())
-    self.assertGreater(out_eval, 0)
+    with self.test_session():
+      # Instantiating HDF5Matrix for the training set,
+      # which is a slice of the first 150 elements
+      x_train = keras.utils.io_utils.HDF5Matrix(
+          h5_path, 'my_data', start=0, end=150)
+      y_train = keras.utils.io_utils.HDF5Matrix(
+          h5_path, 'my_labels', start=0, end=150)
+
+      # Likewise for the test set
+      x_test = keras.utils.io_utils.HDF5Matrix(
+          h5_path, 'my_data', start=150, end=200)
+      y_test = keras.utils.io_utils.HDF5Matrix(
+          h5_path, 'my_labels', start=150, end=200)
+
+      # HDF5Matrix behave more or less like Numpy matrices
+      # with regard to indexing
+      self.assertEqual(y_train.shape, (150, 1))
+      # But they don't support negative indices, so don't try print(x_train[-1])
+
+      self.assertEqual(y_train.dtype, np.dtype('i'))
+      self.assertEqual(y_train.ndim, 2)
+      self.assertEqual(y_train.size, 150)
+
+      model = keras.models.Sequential()
+      model.add(keras.layers.Dense(64, input_shape=(10,), activation='relu'))
+      model.add(keras.layers.Dense(1, activation='sigmoid'))
+      model.compile(loss='binary_crossentropy', optimizer='sgd')
+
+      # Note: you have to use shuffle='batch' or False with HDF5Matrix
+      model.fit(x_train, y_train, batch_size=32, shuffle='batch', verbose=False)
+      # test that evalutation and prediction
+      # don't crash and return reasonable results
+      out_pred = model.predict(x_test, batch_size=32, verbose=False)
+      out_eval = model.evaluate(x_test, y_test, batch_size=32, verbose=False)
+
+      self.assertEqual(out_pred.shape, (50, 1))
+      self.assertEqual(out_eval.shape, ())
+      self.assertGreater(out_eval, 0)
 
 
 if __name__ == '__main__':
-- 
GitLab


From 9acea81c16c222f145515354f6176852dfdb03d7 Mon Sep 17 00:00:00 2001
From: Benoit Steiner 
Date: Wed, 30 Aug 2017 18:37:32 -0700
Subject: [PATCH 137/294] Annotate the graph properties with input values if
 they're known statically.

PiperOrigin-RevId: 167081910
---
 tensorflow/core/grappler/costs/graph_properties.cc   | 12 ++++++++++++
 .../core/grappler/costs/graph_properties_test.cc     |  9 +++++++++
 2 files changed, 21 insertions(+)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index 0ab6aff250..1b1c88f2df 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -396,6 +396,18 @@ Status GraphProperties::InferStatically() {
       }
       input_properties.push_back(properties);
     }
+    for (const auto& edge : node->in_edges()) {
+      if (!edge->src()->IsConstant()) {
+        continue;
+      }
+      const int input_id = edge->dst_input();
+      if (input_id >= input_properties.size()) {
+        continue;
+      }
+      const NodeDef& node = edge->src()->def();
+      const TensorProto& raw_val = node.attr().at("value").tensor();
+      *input_properties[input_id].mutable_value() = raw_val;
+    }
     input_properties_[node->name()] = input_properties;
 
     // TODO(bsteiner): share this code with the input processing above.
diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index 954c5ead8f..461e58cf73 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -345,6 +345,15 @@ TEST_F(GraphPropertiesTest, MergeWithoutLoops) {
     EXPECT_EQ(DT_FLOAT, prop.dtype());
     EXPECT_EQ(expected_outputs[i], PropToString(prop));
   }
+
+  // The "Less" node should be fed by 2 int32 scalar constant values.
+  const auto props = properties.GetInputProperties("Less");
+  EXPECT_EQ(2, props.size());
+  for (int i = 0; i < props.size(); ++i) {
+    EXPECT_EQ(DT_INT32, props[i].dtype());
+    EXPECT_TRUE(props[i].has_value());
+    EXPECT_EQ("int32: []", PropToString(props[i]));
+  }
 }
 
 TEST_F(GraphPropertiesTest, WhileLoop) {
-- 
GitLab


From 424aa9aa9559f6fa29d8ccf3d74ff25528b39209 Mon Sep 17 00:00:00 2001
From: Alexandre Passos 
Date: Wed, 30 Aug 2017 19:51:38 -0700
Subject: [PATCH 138/294] Eager-graph mode should work with gradient
 computation.

PiperOrigin-RevId: 167086826
---
 tensorflow/python/BUILD                        |  4 +++-
 tensorflow/python/eager/backprop.py            |  6 +-----
 tensorflow/python/eager/function_test.py       | 14 ++++++++++++++
 tensorflow/python/framework/op_def_library.py  |  1 +
 tensorflow/python/ops/resource_variable_ops.py | 12 +++++++++++-
 5 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index b75f79fbf4..98dce82ee3 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -1766,6 +1766,8 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":array_ops",
+        ":array_ops_gen",
+        ":dtypes",
         ":framework_ops",
         ":resource_variable_ops_gen",
         ":tensor_shape",
@@ -1775,7 +1777,7 @@ py_library(
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:custom_gradient",
         "//tensorflow/python/eager:tape",
-        "//tensorflow/python/eager:tensor",
+        "//tensorflow/python/eager:tensor_node",
     ],
 )
 
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index ca3ad1a2c3..326f56ebf9 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -169,10 +169,6 @@ def _record_gradient(op_name, inputs, attrs, results, name):
 execute.record_gradient = _record_gradient
 
 
-def _ones(shape, dtype):
-  return array_ops.fill(shape, tensor.Tensor(1, dtype=dtype))
-
-
 def _aggregate_grads(gradients):
   """Aggregate gradients of the same tensor."""
   grad_lists = dict()
@@ -225,7 +221,7 @@ def implicit_val_and_grad(f):
                        (end_node.progenitors, repr(start_node)))
     output_gradients = kwds.get("output_gradients", None)
     if output_gradients is None:
-      output_gradients = _ones(end_node.shape, end_node.dtype)
+      output_gradients = array_ops.ones_like(end_node.value)
     grad = ag_core.backward_pass(output_gradients, end_node, start_node)
     return end_node.value, _aggregate_grads(grad.gradients)
 
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 18b722e792..c15dde9e48 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.framework import function as tf_function
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
 
 
 class FunctionTest(test.TestCase):
@@ -52,6 +53,19 @@ class FunctionTest(test.TestCase):
     out = sq(t)
     self.assertAllEqual(out.numpy(), math_ops.matmul(t, t).numpy())
 
+  def testGraphModeWithGradients(self):
+    v = resource_variable_ops.ResourceVariable(1.0)
+
+    @function.defun
+    def step():
+      def inner():
+        tape.watch(v.handle)
+        return v * v
+
+      return backprop.implicit_grad(inner)()[0][1]
+
+    self.assertAllEqual(step().numpy(), 2.0)
+
   def testTensorConversionWithDefun(self):
 
     @function.defun
diff --git a/tensorflow/python/framework/op_def_library.py b/tensorflow/python/framework/op_def_library.py
index aa37360066..76424ef579 100644
--- a/tensorflow/python/framework/op_def_library.py
+++ b/tensorflow/python/framework/op_def_library.py
@@ -784,6 +784,7 @@ class OpDefLibrary(object):
                               if arg.is_ref]
       with _MaybeColocateWith(must_colocate_inputs):
         # Add Op to graph
+        inputs = [ag_core.getval(x) for x in inputs]
         op = g.create_op(op_type_name, inputs, output_types, name=scope,
                          input_types=input_types, attrs=attr_protos,
                          op_def=op_def)
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index 1d747f8400..1471b5909e 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -19,11 +19,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from autograd import core as ag_core
+
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.core.framework import variable_pb2
 from tensorflow.python.eager import context
 from tensorflow.python.eager import custom_gradient
 from tensorflow.python.eager import tape
+from tensorflow.python.eager import tensor_node
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
@@ -574,7 +577,14 @@ class ResourceVariable(variables.Variable):
 
     def _run_op(a, *args):
       # pylint: disable=protected-access
-      return getattr(ops.Tensor, operator)(a._AsTensor(), *args)
+      value = a._AsTensor()
+      if ag_core.isnode(value):
+        # This avoids autograd trying to wrap a ResourceVariable.
+        value = ops.convert_to_tensor(value)
+        args = [ops.convert_to_tensor(x) for x in args]
+        return getattr(tensor_node.TensorNode, operator)(value, *args)
+      else:
+        return getattr(ops.Tensor, operator)(value, *args)
 
     # Propagate __doc__ to wrapper
     try:
-- 
GitLab


From 9624d165f1f2c717eda96464fee8bf7229cc14f5 Mon Sep 17 00:00:00 2001
From: Igor Ganichev 
Date: Wed, 30 Aug 2017 21:05:14 -0700
Subject: [PATCH 139/294] Add function support to Tensorflow C API

This change adds minimal functionality. Support for FunctionOptions,
attributes, output name rewriting, function name generation, etc is
comming next.

PiperOrigin-RevId: 167091238
---
 tensorflow/c/BUILD                            |   24 +-
 tensorflow/c/c_api.cc                         |   37 +-
 tensorflow/c/c_api.h                          |  116 ++
 tensorflow/c/c_api_function.cc                |  496 ++++++++
 tensorflow/c/c_api_function_test.cc           | 1039 +++++++++++++++++
 tensorflow/c/c_api_internal.h                 |    8 +
 tensorflow/c/c_api_test.cc                    |    2 +-
 tensorflow/c/c_test_util.cc                   |  131 ++-
 tensorflow/c/c_test_util.h                    |   20 +
 tensorflow/contrib/cmake/tf_c.cmake           |    1 +
 tensorflow/core/graph/graph.cc                |   13 +-
 tensorflow/core/graph/graph.h                 |    4 +
 tensorflow/python/client/tf_session.i         |   27 +
 tensorflow/python/client/tf_session_helper.cc |   34 +
 tensorflow/python/client/tf_session_helper.h  |   10 +
 tensorflow/python/framework/function.py       |   19 +
 tensorflow/python/framework/function_test.py  |  112 +-
 tensorflow/python/framework/ops.py            |    8 +
 18 files changed, 2072 insertions(+), 29 deletions(-)
 create mode 100644 tensorflow/c/c_api_function.cc
 create mode 100644 tensorflow/c/c_api_function_test.cc

diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 604dfab148..1822e235eb 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -45,8 +45,13 @@ tf_cuda_library(
 
 tf_cuda_library(
     name = "c_api",
-    srcs = ["c_api.cc"],
-    hdrs = ["c_api.h"],
+    srcs = [
+        "c_api.cc",
+        "c_api_function.cc",
+    ],
+    hdrs = [
+        "c_api.h",
+    ],
     copts = tf_copts(),
     visibility = ["//visibility:public"],
     deps = select({
@@ -157,6 +162,21 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "c_api_function_test",
+    size = "small",
+    srcs = ["c_api_function_test.cc"],
+    deps = [
+        ":c_api",
+        ":c_test_util",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 tf_cc_test(
     name = "while_loop_test",
     size = "small",
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 07c8277a6f..c454c94249 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -165,22 +165,6 @@ void deallocate_buffer(void* data, size_t len, void* arg) {
   tensorflow::cpu_allocator()->DeallocateRaw(data);
 }
 
-Status MessageToBuffer(const tensorflow::protobuf::Message& in,
-                       TF_Buffer* out) {
-  if (out->data != nullptr) {
-    return InvalidArgument("Passing non-empty TF_Buffer is invalid.");
-  }
-  const auto proto_size = in.ByteSizeLong();
-  void* buf = tensorflow::port::Malloc(proto_size);
-  in.SerializeToArray(buf, proto_size);
-  out->data = buf;
-  out->length = proto_size;
-  out->data_deallocator = [](void* data, size_t length) {
-    tensorflow::port::Free(data);
-  };
-  return Status::OK();
-}
-
 }  // namespace
 
 TF_Tensor::~TF_Tensor() { buffer->Unref(); }
@@ -559,6 +543,27 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
                       dimvec.size(), base, size, DeleteArray, base);
 }
 
+Status MessageToBuffer(const tensorflow::protobuf::Message& in,
+                       TF_Buffer* out) {
+  if (out->data != nullptr) {
+    return InvalidArgument("Passing non-empty TF_Buffer is invalid.");
+  }
+  const size_t proto_size = in.ByteSizeLong();
+  void* buf = tensorflow::port::Malloc(proto_size);
+  if (buf == nullptr) {
+    return tensorflow::errors::ResourceExhausted(
+        "Failed to allocate memory to serialize message of type '",
+        in.GetTypeName(), "' and size ", proto_size);
+  }
+  in.SerializeToArray(buf, proto_size);
+  out->data = buf;
+  out->length = proto_size;
+  out->data_deallocator = [](void* data, size_t length) {
+    tensorflow::port::Free(data);
+  };
+  return Status::OK();
+}
+
 // Helpers for loading a TensorFlow plugin (a .so file).
 Status LoadLibrary(const char* library_filename, void** result,
                    const void** buf, size_t* len);
diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index 43b5078013..ee110d88ce 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@@ -357,6 +357,14 @@ typedef struct TF_Output {
   int index;  // The index of the output within oper.
 } TF_Output;
 
+// TF_Function is a grouping of operations with defined inputs and outputs.
+// Once created and added to graphs, functions can be invoked by creating an
+// operation whose operation type matches the function name.
+typedef struct TF_Function TF_Function;
+
+// Function definition options. TODO(iga): Define and implement
+typedef struct TF_FunctionOptions TF_FunctionOptions;
+
 // Sets the shape of the Tensor referenced by `output` in `graph` to
 // the shape described by `dims` and `num_dims`.
 //
@@ -914,6 +922,15 @@ TF_CAPI_EXPORT extern void TF_GraphImportGraphDef(
     TF_Graph* graph, const TF_Buffer* graph_def,
     const TF_ImportGraphDefOptions* options, TF_Status* status);
 
+// Add `function` to graph `g`. Once `function` is added to `g`,
+// it can be called by creating an operation using the function's name.
+//
+// If successful, status is set to OK and function is added to g
+// Otherwise, status is set to the encountered error and g is unmodified
+TF_CAPI_EXPORT extern void TF_GraphAddFunction(TF_Graph* g,
+                                               const TF_Function* function,
+                                               TF_Status* status);
+
 // Note: The following function may fail on very large protos in the future.
 
 TF_CAPI_EXPORT extern void TF_OperationToNodeDef(TF_Operation* oper,
@@ -1001,6 +1018,105 @@ TF_CAPI_EXPORT void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny,
                                     TF_Output* x, int nx, TF_Output* dx,
                                     TF_Status* status, TF_Output* dy);
 
+// Create a TF_Function from a TF_Graph
+//
+// Params:
+//  fn_body - the graph whose operations (or subset of whose operations) will be
+//            converted to TF_Function.
+//  fn_name - the name of the new TF_Function. Should match the operation
+//            name (OpDef.name) regexp [A-Z][A-Za-z0-9_.\\-/]* and be distinct
+//            from other operation names (at least those registered in graphs
+//            where this function will be used).
+//            TODO(iga): Allow null in here and have C API come up with
+//            a unique name with high probability (similarly to
+//            _create_hash_str in function.py)
+//  num_opers - `num_opers` contains the number of elements in the `opers` array
+//              or a special value of -1 meaning that no array is given.
+//              The distinction between an empty array of operations and no
+//              array of operations is necessary to distinguish the case of
+//              creating a function with no body (e.g. identity or permutation)
+//              and the case of creating a function whose body contains all
+//              the nodes in the graph (except for the automatic skipping, see
+//              below).
+//  opers - Array of operations to become the body of the function or null.
+//          - If no array is given (`num_opers`  = -1), all the
+//          operations in `fn_body` will become part of the function
+//          except operations referenced in `inputs`. These operations
+//          must have a single output (these operations are typically
+//          placeholders created for the sole purpose of representing
+//          an input. We can relax this constraint if there are
+//          compelling use cases).
+//          - If an array is given (`num_opers` >= 0), all operations
+//          in it will become part of the function. In particular, no
+//          automatic skipping of dummy input operations is performed.
+//  ninputs - number of elements in `inputs` array
+//  inputs - array of TF_Outputs that specify the inputs to the function.
+//           If `ninputs` is zero (the function takes no inputs), `inputs`
+//           can be null. The names used for function inputs are normalized
+//           names of the operations (usually placeholders) pointed to by
+//           `inputs`. These operation names should start with a letter.
+//           Normalization will convert all letters to lowercase and
+//           non-alphanumeric characters to '_' to make resulting names match
+//           the "[a-z][a-z0-9_]*" pattern for operation argument names.
+//           `inputs` cannot contain the same tensor twice.
+//  noutputs - number of elements in `outputs` array
+//  outputs - array of TF_Outputs that specify the outputs of the function.
+//            If `noutputs` is zero (the function returns no outputs), `outputs`
+//            can be null. `outputs` can contain the same tensor more than once.
+//  output_names - The names of the function's outputs. `output_names` array
+//                 must either have the same length as `outputs`
+//                 (i.e. `noutputs`) or be null. In the former case,
+//                 the names should match the regular expression for ArgDef
+//                 names - "[a-z][a-z0-9_]*". In the latter case,
+//                 names for outputs will be generated automatically.
+//  opts - various options for the function, e.g. XLA's inlining control.
+//  status - Set to OK on success and an appropriate error on failure.
+//
+// Note that when the same TF_Output is listed as both an input and an output,
+// the corresponding function's output will equal to this input,
+// instead of the original node's output.
+//
+// Callers must also satisfy the following constraints:
+// - `inputs` cannot refer to TF_Outputs within a control flow context. For
+//   example, one cannot use the output of "switch" node as input.
+// - No TF_Output of a function (inside any of `inputs`, `outputs`, `fn_body`)
+//   is allowed to have a reference type. Reference types are not exposed
+//   through C API and are being deprecated.
+// - Every node in the function's body must have all of its inputs (including
+//   control inputs). In other words, for every node in the body, each input
+//   must be either listed in `inputs` or must come from another node in
+//   the body. In particular, it is an error to have a control edge going from
+//   a node outside of the body into a node in the body. This applies to control
+//   edges going from nodes referenced in `inputs` to nodes in the body when
+//   the former nodes are not in the body (automatically skipped or not
+//   included in explicitly specified body).
+//
+// Returns:
+//  On successful, a newly created TF_Function instance. It must be deleted by
+//  calling TF_DeleteFunction.
+//
+//  On failure, null.
+//
+// TODO(iga): Add input_names argument and get output_names working (they are
+// currently ignored)
+TF_CAPI_EXPORT extern TF_Function* TF_GraphToFunction(
+    const TF_Graph* fn_body, const char* fn_name, int num_opers,
+    const TF_Operation* const* opers, int ninputs, const TF_Output* inputs,
+    int noutputs, const TF_Output* outputs, const char* const* output_names,
+    const TF_FunctionOptions* opts, TF_Status* status);
+
+// Write out a serialized representation of `func` (as a FunctionDef protocol
+// message) to `output_func_def` (allocated by TF_NewBuffer()).
+// `output_func_def`'s underlying buffer will be freed when TF_DeleteBuffer()
+// is called.
+//
+// May fail on very large graphs in the future.
+TF_CAPI_EXPORT extern void TF_FunctionToFunctionDef(TF_Function* func,
+                                                    TF_Buffer* output_func_def,
+                                                    TF_Status* status);
+
+TF_CAPI_EXPORT extern void TF_DeleteFunction(TF_Function*);
+
 // TODO(josh11b): Register OpDef, available to all operations added
 // to this graph.
 
diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
new file mode 100644
index 0000000000..b4c6397d0b
--- /dev/null
+++ b/tensorflow/c/c_api_function.cc
@@ -0,0 +1,496 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/c/c_api_internal.h"
+
+#include 
+#include 
+#include 
+
+#include "tensorflow/core/framework/attr_value_util.h"
+#include "tensorflow/core/framework/function.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+
+namespace tensorflow {
+namespace {
+
+// Class that maintains a one-to-one original node name -> new node name
+// mapping. We normalize the names used as input and output arguments to match
+// regexp "[a-z][a-z0-9_]*" specified in definition of ArgDef.name.
+// Once we rename them, we risk creating a name collision with the other
+// node names, so if necessary we add a suffix to make
+// names unique. If we have an input named "A" and a node in the function
+// body named "a", they will be renamed to "a" and "a_0".
+class NodeNameMapping {
+ public:
+  NodeNameMapping() = default;
+
+  // Normalize the input/output name and make it unique.
+  string GetIOName(const string& name);
+
+  // Make the node name unique.
+  string Uniquify(const string& name);
+
+  // Look up how a node name was previously normalized/uniquified.
+  // Returns empty if name was never seen.
+  string Lookup(const string& name) const;
+
+ private:
+  string UniquifyHelper(const string& name) const;
+  static string Normalize(string name);
+
+  // The normalized/uniquified names already used as
+  // input names (in signature), output names (in signature), and node names
+  // (in node_def).
+  // This is a superset of values in name_mapping_.
+  std::unordered_set used_names_;
+  // Mapping from original node name from the graph to the normalized
+  // and uniqified version of it.
+  std::unordered_map name_mapping_;
+};
+
+string NodeNameMapping::Normalize(string name) {
+  // Convert letters to lowercase and non-alphanumeric characters to '_'.
+  if (name.empty()) return "unknown";
+  const int n = name.size();
+  for (int i = 0; i < n; ++i) {
+    char c = name[i];
+    if (isalnum(c)) {
+      if (isupper(c)) {
+        name[i] = tolower(c);
+      }
+    } else {
+      name[i] = '_';
+    }
+  }
+
+  // Find the first letter and start with it.
+  int i = 0;
+  for (; i < n; ++i) {
+    if (isalpha(name[i])) break;
+  }
+
+  // Return "unknown" if none of the name's chars were letters.
+  return i == n ? "unknown" : name.substr(i);
+}
+
+string NodeNameMapping::UniquifyHelper(const string& name) const {
+  // If the name hasn't been used yet, use it as-is.
+  if (used_names_.find(name) == used_names_.end()) return name;
+  // Add a suffix to name to make it unique.
+  for (int i = 0;; ++i) {
+    const string candidate = strings::StrCat(name, "_", i);
+    if (used_names_.find(candidate) == used_names_.end()) return candidate;
+  }
+}
+
+string NodeNameMapping::GetIOName(const string& name) {
+  const string& input_name = UniquifyHelper(Normalize(name));
+  // Record that we used this name, but don't add it to name_mapping_
+  // since this name is not for a node.
+  used_names_.insert(input_name);
+  return input_name;
+}
+
+string NodeNameMapping::Uniquify(const string& name) {
+  const string uniqued = UniquifyHelper(name);
+  name_mapping_[name] = uniqued;
+  used_names_.insert(uniqued);
+  return uniqued;
+}
+
+string NodeNameMapping::Lookup(const string& name) const {
+  const auto iter = name_mapping_.find(name);
+  if (iter == name_mapping_.end()) return string();
+  return iter->second;
+}
+
+Status ValidateNoRefOutputs(const Node* node) {
+  for (int i = 0; i < node->num_outputs(); ++i) {
+    const DataType& dt = node->output_type(i);
+    if (IsRefType(dt)) {
+      return errors::InvalidArgument("Output ", i, " of node '", node->name(),
+                                     "' has a reference "
+                                     "type ",
+                                     DataTypeString(dt));
+    }
+  }
+  return Status::OK();
+}
+
+Status FillFunctionBody(
+    const string& fn_name, const NodeNameMapping& node_names,
+    const std::vector& body_nodes,
+    const std::unordered_map& tensor_renaming,
+    FunctionDef* fdef) {
+  std::vector in_edges;
+  std::vector control_edges;
+  for (const Node* node : body_nodes) {
+    NodeDef* node_def = fdef->add_node_def();
+    // First, copy the node_def as is. We will patch it next.
+    *node_def = node->def();
+    if (!node->assigned_device_name().empty()) {
+      node_def->set_device(node->assigned_device_name());
+    }
+    node_def->set_name(node_names.Lookup(node->name()));
+
+    // Input names must be set based on nested names in tensor_renaming.
+    // Clear the flat input names we got from the original node_def
+    // from the graph.
+    node_def->clear_input();
+
+    // Collect regular and control inputs. Regular inputs are indexed
+    // by the index at which they come into the `node`. Control inputs
+    // don't follow any order.
+    in_edges.clear();
+    in_edges.resize(node->num_inputs(), nullptr);
+    control_edges.clear();
+    for (const Edge* edge : node->in_edges()) {
+      if (edge->src()->IsSource()) continue;
+      if (edge->IsControlEdge()) {
+        control_edges.push_back(edge);
+      } else {
+        in_edges[edge->dst_input()] = edge;
+      }
+    }
+
+    // Add regular inputs.
+    for (size_t i = 0; i < in_edges.size(); ++i) {
+      const Edge* edge = in_edges[i];
+      string original_input_name;
+      if (edge == nullptr) {
+        // A backedge might not appear as a regular Edge, but be only present
+        // in the node_def. Such edges are referred to as requested_inputs().
+        if (i >= node->requested_inputs().size()) {
+          return errors::InvalidArgument(
+              "Graph to be converted to function appears to be malformed. ",
+              "Node ", node->name(), " is missing input edge ", i);
+        }
+        original_input_name =
+            ParseTensorName(node->requested_inputs()[i]).ToString();
+      } else {
+        original_input_name =
+            strings::StrCat(edge->src()->name(), ":", edge->src_output());
+      }
+
+      const auto iter = tensor_renaming.find(original_input_name);
+      if (iter == tensor_renaming.end()) {
+        return errors::InvalidArgument(
+            "Input ", i, ", '", original_input_name, "', of node '",
+            node->name(), "' in function '", fn_name,
+            "' is not available. You might need to include it in inputs "
+            "or include its source node in the body");
+      }
+      node_def->add_input(iter->second);
+    }
+
+    // Add control inputs.
+    for (const Edge* edge : control_edges) {
+      // Add this control input only if the src node is in the body.
+      const string normalized = node_names.Lookup(edge->src()->name());
+      // If we did not find a name for the source of control edge, this
+      // source must be outside of the body. Raise an error.
+      if (normalized.empty()) {
+        return errors::InvalidArgument(
+            "The source of control edge ", edge->DebugString(),
+            " is not in the body. Encountered while creating function '",
+            fn_name, "'");
+      }
+      node_def->add_input(strings::StrCat("^", normalized));
+    }
+  }
+  return Status::OK();
+}
+
+// Graph to FunctionDef conversion. This code is closely modeled on the Python
+// code in third_party/tensorflow/python/framework/function.py.
+Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name,
+                          const std::vector& body_nodes,
+                          const std::vector& inputs,
+                          const std::vector& outputs,
+                          const std::vector& output_names,
+                          FunctionDef* fdef) {
+  fdef->mutable_signature()->set_name(fn_name);
+
+  // Keep track of names we used and how we normalized them.
+  NodeNameMapping node_names;
+
+  // Mapping from original names of tensors (i.e. ":") to the
+  // name we used in the function:
+  //  - For input tensors:
+  //    {flat_tensor_name -> normalized_name_of_src_node}
+  //    e.g. {In:3 -> in}
+  //  - For tensors produced by nodes in function's body:
+  //    {flat_tensor_name -> nested_tensor_name}
+  //    e.g. {Add:3 -> add_0:z:1}
+  std::unordered_map tensor_renaming;
+
+  // Fill inputs in function's signature.
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    const Node* node = inputs[i].node;
+    int idx = inputs[i].index;
+    OpDef::ArgDef* argdef = fdef->mutable_signature()->add_input_arg();
+    argdef->set_type(node->output_type(idx));
+    const string& input_name = node_names.GetIOName(node->name());
+    argdef->set_name(input_name);
+    tensor_renaming[strings::StrCat(node->name(), ":", idx)] = input_name;
+  }
+
+  // Fill outputs in function's signature.
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    const Node* node = outputs[i].node;
+    int idx = outputs[i].index;
+    OpDef::ArgDef* argdef = fdef->mutable_signature()->add_output_arg();
+    argdef->set_type(node->output_type(idx));
+    argdef->set_name(node_names.GetIOName(node->name()));
+  }
+
+  // Populate tensor_renaming and node_names.
+  // Generate the new output names for every node in the function.
+  // The NodeDefs in FunctionDefs use a different naming scheme for
+  // their inputs than the NodeDefs in a graph (see the comment for
+  // FunctionDef.node_def in function.proto). We do the
+  // graph tensor name -> function tensor name conversion for every
+  // possible input (i.e. every node's outputs) and store the result
+  // in tensor_renaming.
+  for (const Node* node : body_nodes) {
+    // Make sure node_name does not collide with an input or output name.
+    const string& node_name = node_names.Uniquify(node->name());
+    // For each output_arg in the op_def, the output_ranges
+    // map will have [start, end] range of indices that this arg produces
+    // among all the output tensors of this op.
+    NameRangeMap output_ranges;
+    TF_RETURN_IF_ERROR(
+        NameRangesForNode(*node, node->op_def(), nullptr, &output_ranges));
+    for (const auto& output : output_ranges) {
+      const string& output_name = output.first;
+      int index_start = output.second.first;
+      int index_end = output.second.second;
+      for (int i = index_start; i < index_end; ++i) {
+        const string& original_name = strings::StrCat(node->name(), ":", i);
+        const string& new_name =
+            strings::StrCat(node_name, ":", output_name, ":", i - index_start);
+        // Record the mapping if this tensor is not already mapped.
+        // Tensor can be already mapped if it is used as an input.
+        if (tensor_renaming.find(original_name) == tensor_renaming.end()) {
+          tensor_renaming[original_name] = new_name;
+        }
+      }
+    }
+  }
+
+  TF_RETURN_IF_ERROR(
+      FillFunctionBody(fn_name, node_names, body_nodes, tensor_renaming, fdef));
+
+  // Remap return values.
+  for (int r = 0; r < fdef->signature().output_arg_size(); ++r) {
+    const string& ret_name = fdef->signature().output_arg(r).name();
+
+    // We convert this flat tensor name to the nested value
+    // (e.g. `add:z:1`) that we stored in tensor_renaming.
+    const string& return_value =
+        strings::StrCat(outputs[r].node->name(), ":", outputs[r].index);
+    const auto iter = tensor_renaming.find(return_value);
+    if (iter == tensor_renaming.end()) {
+      return errors::InvalidArgument(
+          "TF_Output ", return_value, " is neither in the function body ",
+          "nor among function inputs. Encountered while creating function '",
+          fn_name, "'");
+    }
+    (*fdef->mutable_ret())[ret_name] = iter->second;
+  }
+
+  return Status::OK();
+}
+
+// Converts `ninputs` and `inputs` into `inputs_tensors` and `input_nodes` and
+// does various checks while doing so. `input_nodes` will contain the same
+// information as input_tensors just in a different structure to make
+// following processing easier. TODO(iga): Simplify this nested structure.
+Status ProcessInputs(
+    const TF_Graph* fn_body, const char* fn_name, int ninputs,
+    const TF_Output* inputs, std::vector* input_tensors,
+    std::unordered_map>* input_nodes)
+    EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) {
+  input_tensors->reserve(ninputs);
+  for (int i = 0; i < ninputs; ++i) {
+    const Node& node = inputs[i].oper->node;
+    int idx = inputs[i].index;
+
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(
+        fn_body->graph.IsValidOutputTensor(&node, idx),
+        "Encountered while processing input ", i, " into function '", fn_name,
+        "'");
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(ValidateNoRefOutputs(&node),
+                                    "Encountered while processing input ", i,
+                                    " into function '", fn_name, "'");
+
+    input_tensors->emplace_back(&node, idx);
+
+    const auto& iter = input_nodes->find(&node);
+    if (iter == input_nodes->end()) {
+      input_nodes->insert({&node, {idx}});
+    } else {
+      auto& indices = iter->second;
+      if (std::find(indices.begin(), indices.end(), idx) != indices.end()) {
+        return errors::InvalidArgument(
+            "TF_Output ", node.name(), ":", idx,
+            " appears more than once in the input list");
+      }
+      indices.push_back(idx);
+    }
+  }
+  return Status::OK();
+}
+
+// Converts `noutputs` and `outputs` into `outputs_tensors` and does various
+// checks while doing so.
+Status ProcessOutputs(const TF_Graph* fn_body, const char* fn_name,
+                      int noutputs, const TF_Output* outputs,
+                      std::vector* output_tensors)
+    EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) {
+  output_tensors->reserve(noutputs);
+  for (int i = 0; i < noutputs; ++i) {
+    const Node& node = outputs[i].oper->node;
+    int idx = outputs[i].index;
+    TF_RETURN_WITH_CONTEXT_IF_ERROR(
+        fn_body->graph.IsValidOutputTensor(&node, idx),
+        "Encountered while processing output ", i, " from function '", fn_name,
+        "'");
+    output_tensors->emplace_back(&node, idx);
+  }
+  return Status::OK();
+}
+
+// Populates `body_nodes` with the nodes that will become function's body.
+// Performs various checks.
+Status ComputeBodyNodes(
+    const TF_Graph* fn_body, const char* fn_name, int num_opers,
+    const TF_Operation* const* opers,
+    const std::unordered_map>& input_nodes,
+    std::vector* body_nodes)
+    EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) {
+  if (num_opers == -1) {
+    for (const Node* node : fn_body->graph.op_nodes()) {
+      const auto& iter = input_nodes.find(node);
+      if (iter == input_nodes.end()) {
+        // This node is not referenced in inputs. Add it to the body.
+        TF_RETURN_WITH_CONTEXT_IF_ERROR(ValidateNoRefOutputs(node),
+                                        "Encountered while creating function '",
+                                        fn_name, "'");
+        body_nodes->push_back(node);
+      } else {
+        // This node is referenced in inputs. Currently, we place an
+        // artificial restriction and require that when num_opers=-1, such
+        // nodes must have a single output.
+        if (node->num_outputs() != 1) {
+          return errors::InvalidArgument(
+              "When `num_opers` is set to -1, nodes referenced in `inputs` "
+              "must have a single output. Node ",
+              node->name(), " has ", node->num_outputs(),
+              " outputs. Encountered while creating function '", fn_name, "'");
+        }
+      }
+    }
+  } else {
+    body_nodes->reserve(num_opers);
+    for (int i = 0; i < num_opers; ++i) {
+      const Node* node = &opers[i]->node;
+      TF_RETURN_WITH_CONTEXT_IF_ERROR(ValidateNoRefOutputs(node),
+                                      "Encountered while creating function '",
+                                      fn_name, "'");
+      body_nodes->push_back(node);
+    }
+  }
+  return Status::OK();
+}
+
+}  // anonymous namespace
+}  // namespace tensorflow
+
+using tensorflow::Node;
+using tensorflow::string;
+
+TF_Function* TF_GraphToFunction(const TF_Graph* fn_body, const char* fn_name,
+                                int num_opers, const TF_Operation* const* opers,
+                                int ninputs, const TF_Output* inputs,
+                                int noutputs, const TF_Output* outputs,
+                                const char* const* output_names,
+                                const TF_FunctionOptions* opts,
+                                TF_Status* status) {
+  tensorflow::mutex_lock l(*const_cast(&fn_body->mu));
+
+  // Process inputs.
+  std::vector input_tensors;
+  std::unordered_map> input_nodes;
+  status->status = tensorflow::ProcessInputs(fn_body, fn_name, ninputs, inputs,
+                                             &input_tensors, &input_nodes);
+  if (!status->status.ok()) return nullptr;
+
+  // Process outputs.
+  std::vector output_tensors;
+  status->status = tensorflow::ProcessOutputs(fn_body, fn_name, noutputs,
+                                              outputs, &output_tensors);
+  if (!status->status.ok()) return nullptr;
+
+  // Process output names.
+  std::vector output_names_vec;
+  if (output_names) {
+    output_names_vec.reserve(noutputs);
+    for (int i = 0; i < noutputs; ++i) {
+      output_names_vec.push_back(string(output_names[i]));
+    }
+  }
+
+  // Compute body nodes.
+  std::vector body_nodes;
+  status->status = tensorflow::ComputeBodyNodes(
+      fn_body, fn_name, num_opers, opers, input_nodes, &body_nodes);
+  if (!status->status.ok()) return nullptr;
+
+  // Do the actual function creation.
+  TF_Function* tf_function = new TF_Function();
+  status->status = tensorflow::GraphToFunctionDef(
+      fn_body->graph, fn_name, body_nodes, input_tensors, output_tensors,
+      output_names_vec, tf_function->fdef_lib.add_function());
+  if (!status->status.ok()) {
+    TF_DeleteFunction(tf_function);
+    return nullptr;
+  }
+  return tf_function;
+}
+
+void TF_GraphAddFunction(TF_Graph* g, const TF_Function* function,
+                         TF_Status* status) {
+  tensorflow::mutex_lock l(g->mu);
+
+  // At the moment, we have only one function and no gradients in fdef_lib.
+  // This makes the following operation atomic.
+  // TODO(iga): Add an atomic version of AddFunctionLibrary when we support
+  // gradients
+  status->status = g->graph.AddFunctionLibrary(function->fdef_lib);
+}
+
+void TF_FunctionToFunctionDef(TF_Function* func, TF_Buffer* output_func_def,
+                              TF_Status* status) {
+  DCHECK_EQ(1, func->fdef_lib.function_size());
+  status->status = MessageToBuffer(func->fdef_lib.function(0), output_func_def);
+}
+
+void TF_DeleteFunction(TF_Function* function) { delete function; }
diff --git a/tensorflow/c/c_api_function_test.cc b/tensorflow/c/c_api_function_test.cc
new file mode 100644
index 0000000000..c9dd38ea15
--- /dev/null
+++ b/tensorflow/c/c_api_function_test.cc
@@ -0,0 +1,1039 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/c/c_api.h"
+
+#include "tensorflow/c/c_test_util.h"
+#include "tensorflow/core/framework/function.pb.h"
+#include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace {
+
+// Specification for expected input/output and its type.
+// DataType value of DT_INVALID signifies that we don't want to
+// check the data type.
+typedef std::pair IOSpec;
+
+std::vector M(const std::initializer_list& names) {
+  std::vector v;
+  for (const string& name : names) {
+    v.push_back(IOSpec(name, DT_INVALID));
+  }
+  return v;
+}
+
+// Specification for an expected edge.
+// src is either:
+// - input name (as it appears in FunctionDef)
+// - name of output tensor (in nested "add:z:0" format)
+// dst is either:
+// - output name (as it appears in FunctionDef)
+// - : (this looks the same as
+//      output tensor naming, but it the index is actually an input index)
+struct EdgeSpec : public std::pair {
+  typedef std::pair Base;
+
+  // Inherit the set of constructors
+  using Base::pair;
+
+  string ToString() const { return strings::StrCat(first, "->", second); }
+};
+
+class CApiFunctionTest : public ::testing::Test {
+ protected:
+  CApiFunctionTest()
+      : s_(TF_NewStatus()),
+        func_graph_(TF_NewGraph()),
+        host_graph_(TF_NewGraph()),
+        func_(nullptr) {}
+
+  void SetUp() override {}
+
+  ~CApiFunctionTest() override {
+    TF_DeleteFunction(func_);
+    TF_DeleteGraph(host_graph_);
+    TF_DeleteGraph(func_graph_);
+    TF_DeleteStatus(s_);
+  }
+
+  void Run(const std::vector>& inputs,
+           TF_Operation* output, int32_t expected_result) {
+    Run(inputs, {{output, 0}}, {expected_result});
+  }
+
+  // Run the host graph, which now contains a function and check that
+  // outputs are as expected.
+  // 'T' stands for 'tensor' since the outputs are tensors, not scalars.
+  void RunT(const std::vector>& inputs,
+            std::initializer_list outputs,
+            const std::vector>& expected_results) {
+    // Create a session for this graph
+    CSession csession(host_graph_, s_);
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+    // Run
+    csession.SetInputs(inputs);
+    csession.SetOutputs(outputs);
+    csession.Run(s_);
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+    // Check results
+    for (int i = 0; i < expected_results.size(); ++i) {
+      TF_Tensor* out = csession.output_tensor(i);
+      ASSERT_TRUE(out != nullptr);
+      EXPECT_EQ(TF_INT32, TF_TensorType(out));
+      EXPECT_EQ(1, TF_NumDims(out));
+      CompareInt32Tensor(expected_results[i], out);
+    }
+  }
+
+  // Run the host graph, which now contains a function and check that
+  // outputs are as expected.
+  void Run(const std::vector>& inputs,
+           std::initializer_list outputs,
+           const std::vector& expected_results) {
+    // Create a session for this graph.
+    CSession csession(host_graph_, s_);
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+    csession.SetInputs(inputs);
+    csession.SetOutputs(outputs);
+    csession.Run(s_);
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+    for (int i = 0; i < expected_results.size(); ++i) {
+      TF_Tensor* out = csession.output_tensor(i);
+      ASSERT_TRUE(out != nullptr);
+      EXPECT_EQ(TF_INT32, TF_TensorType(out));
+      EXPECT_EQ(0, TF_NumDims(out));  // scalar
+      ASSERT_EQ(sizeof(int32_t), TF_TensorByteSize(out));
+      int32_t* output_contents = static_cast(TF_TensorData(out));
+      EXPECT_EQ(expected_results[i], *output_contents);
+    }
+  }
+
+  void CompareInt32Tensor(const std::vector& expected, TF_Tensor* t) {
+    int32_t* data = static_cast(TF_TensorData(t));
+    size_t size = TF_TensorByteSize(t);
+    ASSERT_EQ(expected.size() * sizeof(int32_t), size);
+    for (int i = 0; i < expected.size(); ++i) {
+      ASSERT_EQ(expected[i], data[i]) << "Different data at index " << i;
+    }
+  }
+
+  std::vector ToOutput(const std::vector ops) {
+    std::vector out;
+    for (auto op : ops) {
+      out.push_back({op, 0});
+    }
+    return out;
+  }
+
+  void Define(int num_opers, const std::vector& opers,
+              const std::vector& inputs,
+              const std::vector& outputs,
+              const char** output_names, bool expect_failure = false) {
+    DefineT(num_opers, opers, ToOutput(inputs), ToOutput(outputs), output_names,
+            expect_failure);
+  }
+
+  // An explicit `num_opers` is needed so that we can distinguish between the
+  // case of no operations specified (-1) and the case of an empty set of
+  // operations specified (0).
+  void DefineT(int num_opers, const std::vector& opers,
+               const std::vector& inputs,
+               const std::vector& outputs, const char** output_names,
+               bool expect_failure = false) {
+    ASSERT_EQ(func_, nullptr);
+    func_ = TF_GraphToFunction(func_graph_, func_name_, num_opers,
+                               num_opers == -1 ? nullptr : opers.data(),
+                               inputs.size(), inputs.data(), outputs.size(),
+                               outputs.data(), output_names,
+                               /*opts=*/nullptr, s_);
+    if (expect_failure) {
+      ASSERT_EQ(func_, nullptr);
+      return;
+    }
+
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+    ASSERT_NE(func_, nullptr);
+    TF_GraphAddFunction(host_graph_, func_, s_);
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  }
+
+  TF_Operation* Use(const std::vector& inputs) {
+    return UseT(ToOutput(inputs));
+  }
+
+  TF_Operation* UseT(const std::vector& inputs) {
+    TF_Operation* op;
+    UseHelper(inputs, &op);
+    return op;
+  }
+
+  // All the *Helper methods are used as a workaround for the restrictions that
+  // one cannot call ASSERT_* methods in non-void-returning functions (when
+  // exceptions are disabled during compilation)
+  void UseHelper(const std::vector& inputs, TF_Operation** op) {
+    TF_OperationDescription* desc =
+        TF_NewOperation(host_graph_, func_name_, func_node_name_);
+    for (auto input : inputs) {
+      TF_AddInput(desc, input);
+    }
+    // Set device to CPU because some ops inside the function might not be
+    // available on GPU.
+    TF_SetDevice(desc, "/cpu:0");
+    *op = TF_FinishOperation(desc, s_);
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+    ASSERT_NE(*op, nullptr);
+  }
+
+  FunctionDef fdef() {
+    tensorflow::FunctionDef fdef;
+    EXPECT_TRUE(GetFunctionDef(func_, &fdef));
+    return fdef;
+  }
+
+  // logging utility
+  template 
+  string ToString(const Container& v) {
+    std::stringstream ss;
+    ss << "{";
+    size_t i = 0;
+    for (const auto& e : v) {
+      if (i != 0) {
+        ss << ", ";
+      }
+      ss << e.ToString();
+      ++i;
+    }
+    ss << "}";
+    return ss.str();
+  }
+
+  void VerifyFDefNodes(const tensorflow::FunctionDef& fdef,
+                       const std::unordered_set& nodes) {
+    ASSERT_EQ(nodes.size(), fdef.node_def_size())
+        << "Got unexpected number of nodes. Expected: ["
+        << str_util::Join(nodes, ", ")
+        << "] Actual nodes in fdef: " << fdef.DebugString();
+    for (const NodeDef& node_def : fdef.node_def()) {
+      ASSERT_TRUE(nodes.find(node_def.name()) != nodes.end())
+          << "Got unexpected node: " << node_def.name()
+          << " in fdef: " << fdef.DebugString();
+    }
+  }
+
+  void VerifyFDefInputs(const tensorflow::FunctionDef& fdef,
+                        const std::vector& inputs) {
+    const OpDef& signature = fdef.signature();
+    ASSERT_EQ(inputs.size(), signature.input_arg_size());
+    for (int i = 0; i < inputs.size(); ++i) {
+      const OpDef::ArgDef& arg = signature.input_arg(i);
+      const IOSpec& in = inputs[i];
+      if (in.second != DT_INVALID) {
+        ASSERT_EQ(arg.type(), in.second)
+            << "Got unexpected type for input " << i
+            << ". fdef: " << fdef.DebugString();
+      }
+      ASSERT_EQ(arg.name(), in.first) << "Got unexpected name for input " << i
+                                      << ". fdef: " << fdef.DebugString();
+    }
+  }
+
+  void VerifyFDefOutputs(const tensorflow::FunctionDef& fdef,
+                         const std::vector& outputs) {
+    const OpDef& signature = fdef.signature();
+    ASSERT_EQ(outputs.size(), signature.output_arg_size());
+    for (int i = 0; i < outputs.size(); ++i) {
+      const OpDef::ArgDef& arg = signature.output_arg(i);
+      const IOSpec& out = outputs[i];
+      if (out.second != DT_INVALID) {
+        ASSERT_EQ(arg.type(), out.second)
+            << "Got unexpected type for output " << i
+            << ". fdef: " << fdef.DebugString();
+      }
+      ASSERT_EQ(arg.name(), out.first) << "Got unexpected name for output " << i
+                                       << ". fdef: " << fdef.DebugString();
+    }
+  }
+
+  void VerifyFDefEdges(
+      const tensorflow::FunctionDef& fdef,
+      const std::vector& e_edges,  // expected edges
+      const std::vector& c_edges,  // expected ctrl edges
+      bool is_exact_edges = true) {
+    // Build a set of edges from fdef
+    std::set a_edges;  // actual edges
+    // Get edges from inputs to body nodes and between body nodes
+    for (const NodeDef& node_def : fdef.node_def()) {
+      for (int i = 0; i < node_def.input_size(); ++i) {
+        const string& in = node_def.input(i);
+        const auto& v =
+            a_edges.insert({in, strings::StrCat(node_def.name(), ":", i)});
+        ASSERT_TRUE(v.second) << "Duplicate edge " << in << " -> "
+                              << strings::StrCat(node_def.name(), ":", i)
+                              << ". fdef: " << fdef.DebugString();
+      }
+    }
+    // Get edges from body nodes to outputs and from inputs to outputs
+    for (const OpDef::ArgDef& arg : fdef.signature().output_arg()) {
+      const auto& iter = fdef.ret().find(arg.name());
+      if (iter != fdef.ret().end()) {
+        const auto& v = a_edges.insert({iter->second, arg.name()});
+        ASSERT_TRUE(v.second) << "Duplicate edge " << iter->second << " -> "
+                              << arg.name() << ". fdef: " << fdef.DebugString();
+      } else {
+        const auto& v = a_edges.insert({arg.name(), arg.name()});
+        ASSERT_TRUE(v.second) << "Duplicate edge " << arg.name() << " -> "
+                              << arg.name() << ". fdef: " << fdef.DebugString();
+      }
+    }
+
+    // Verify edges
+    for (const EdgeSpec& e : e_edges) {
+      ASSERT_TRUE(a_edges.find(e) != a_edges.end())
+          << "Failed to find expected edge " << e.ToString()
+          << " in fdef: " << fdef.DebugString();
+    }
+
+    // If caller specified all edges, check that we have seen all
+    if (is_exact_edges) {
+      ASSERT_EQ(e_edges.size() + c_edges.size(), a_edges.size())
+          << "Expected edges: " << ToString(e_edges)
+          << " Expected Control edges: " << ToString(c_edges)
+          << " Actual edges: " << ToString(a_edges)
+          << " in fdef: " << fdef.DebugString();
+    }
+  }
+
+  void VerifyFDef(const std::unordered_set& nodes,
+                  const std::vector& inputs,
+                  const std::vector& outputs,
+                  const std::vector& e_edges,  // expected edges
+                  const std::vector& c_edges,  // expected ctrl edges
+                  bool is_exact_edges = true) {
+    tensorflow::FunctionDef fdef;
+    ASSERT_TRUE(GetFunctionDef(func_, &fdef));
+    VerifyFDefNodes(fdef, nodes);
+    VerifyFDefInputs(fdef, inputs);
+    VerifyFDefOutputs(fdef, outputs);
+    VerifyFDefEdges(fdef, e_edges, c_edges, is_exact_edges);
+  }
+
+  const char* func_name_ = "MyFunc";
+  const char* func_node_name_ = "MyFunc_0";
+  TF_Status* s_;
+  TF_Graph* func_graph_;
+  TF_Graph* host_graph_;
+  TF_Function* func_;
+
+  // Workaround for not being able to initialize empty map using {}
+  std::unordered_set empty_;
+};
+
+TEST_F(CApiFunctionTest, OneOp_ZeroInputs_OneOutput) {
+  /*
+   *                constant
+   *                   |
+   *                   v
+   */
+  // Define
+  TF_Operation* c = ScalarConst(10, func_graph_, s_, "scalar10");
+  Define(-1, {}, {}, {c}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* func_op = Use({});
+  Run({}, func_op, 10);
+  VerifyFDef({"scalar10_0"}, {}, {{"scalar10", DT_INT32}},
+             {{"scalar10_0:output:0", "scalar10"}}, {});
+}
+
+TEST_F(CApiFunctionTest, OneOp_OneInput_OneOutput) {
+  /*
+   *                   |
+   *                   v
+   *                 negate
+   *                   |
+   *                   v
+   */
+  // Define
+  TF_Operation* feed = Placeholder(func_graph_, s_);
+  TF_Operation* neg = Neg(feed, func_graph_, s_);
+  Define(-1, {}, {feed}, {neg}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({func_feed});
+  Run({{func_feed, Int32Tensor(3)}}, func_op, -3);
+  VerifyFDef({"neg_0"}, {{"feed", DT_INT32}}, {{"neg", DT_INT32}},
+             {{"feed", "neg_0:0"}, {"neg_0:y:0", "neg"}}, {});
+}
+
+TEST_F(CApiFunctionTest, ZeroOps_Identity) {
+  /*
+   *                   |
+   *                   |
+   *                   |
+   *                   v
+   */
+  // Define
+  TF_Operation* feed = Placeholder(func_graph_, s_);
+  Define(-1, {}, {feed}, {feed}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({func_feed});
+  Run({{func_feed, Int32Tensor(3)}}, func_op, 3);
+  VerifyFDef(empty_, {{"feed", DT_INT32}}, {{"feed_0", DT_INT32}},
+             {{"feed", "feed_0"}}, {});
+}
+
+TEST_F(CApiFunctionTest, ZeroOps_Permutation) {
+  /*
+   *                   |   |
+   *                   \  /
+   *                    \/
+   *                    x
+   *                   /\
+   *                  /  \
+   *                 |   |
+   *                 v   v
+   */
+  // Define
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  Define(-1, {}, {feed1, feed2}, {feed2, feed1}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* two = ScalarConst(2, host_graph_, s_);
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({two, func_feed});
+  Run({{func_feed, Int32Tensor(3)}}, {{func_op, 0}, {func_op, 1}}, {3, 2});
+  VerifyFDef(empty_, M({{"feed1"}, {"feed2"}}), M({{"feed2_0"}, {"feed1_0"}}),
+             {{"feed1", "feed1_0"}, {"feed2", "feed2_0"}}, {});
+}
+
+TEST_F(CApiFunctionTest, OneOp_TwoInputs_OneOutput) {
+  /*
+   *                  |  |
+   *                  v  v
+   *                  add
+   *                   |
+   *                   v
+   */
+  // Define
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* add = Add(feed1, feed2, func_graph_, s_);
+  Define(-1, {}, {feed1, feed2}, {add}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* two = ScalarConst(2, host_graph_, s_);
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({two, func_feed});
+  Run({{func_feed, Int32Tensor(3)}}, func_op, 2 + 3);
+  VerifyFDef(
+      {"add_0"}, M({{"feed1"}, {"feed2"}}), M({{"add"}}),
+      {{"feed1", "add_0:0"}, {"feed2", "add_0:1"}, {"add_0:sum:0", "add"}}, {});
+}
+
+TEST_F(CApiFunctionTest, OneOp_TwoInputs_ZeroOutputs) {
+  /*
+   *                  |  |
+   *                  v  v
+   *                  add
+   *
+   *            (output ignored)
+   */
+  // Define
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  Add(feed1, feed2, func_graph_, s_);
+  Define(-1, {}, {feed1, feed2}, {}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* two = ScalarConst(2, host_graph_, s_);
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  Use({two, func_feed});
+  VerifyFDef({"add"}, M({{"feed1"}, {"feed2"}}), {},
+             {{"feed1", "add:0"}, {"feed2", "add:1"}}, {});
+}
+
+TEST_F(CApiFunctionTest, TwoOps_ThreeInputs_OneOutput) {
+  /*
+   *                  |  |   |
+   *                  v  v   /
+   *                  add1  /
+   *                   |   |
+   *                   v   v
+   *                   add2
+   *                    |
+   *                    v
+   */
+  // Define
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* feed3 = Placeholder(func_graph_, s_, "feed3");
+  TF_Operation* add1 = Add(feed1, feed2, func_graph_, s_, "add1");
+  TF_Operation* add2 = Add(add1, feed3, func_graph_, s_, "add2");
+  Define(-1, {}, {feed1, feed2, feed3}, {add2}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* two = ScalarConst(2, host_graph_, s_, "two");
+  TF_Operation* ten = ScalarConst(10, host_graph_, s_, "ten");
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({two, ten, func_feed});
+  Run({{func_feed, Int32Tensor(3)}}, func_op, 2 + 10 + 3);
+  VerifyFDef({"add1", "add2_0"}, M({{"feed1"}, {"feed2"}, {"feed3"}}),
+             M({{"add2"}}),
+             {{"feed1", "add1:0"},
+              {"feed2", "add1:1"},
+              {"add1:sum:0", "add2_0:0"},
+              {"feed3", "add2_0:1"},
+              {"add2_0:sum:0", "add2"}},
+             {});
+}
+
+TEST_F(CApiFunctionTest, OneOp_TwoInputs_TwoDuplicateOutputs) {
+  /*
+   *                  |  |
+   *                  v  v
+   *                  add
+   *                   |
+   *                 +-+-+
+   *                 |   |
+   *                 v   v
+   */
+  // Define
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* add = Add(feed1, feed2, func_graph_, s_);
+  Define(-1, {}, {feed1, feed2}, {add, add}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* two = ScalarConst(2, host_graph_, s_);
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({two, func_feed});
+  Run({{func_feed, Int32Tensor(3)}}, {{func_op, 0}, {func_op, 1}}, {5, 5});
+  VerifyFDef({"add_1"}, M({{"feed1"}, {"feed2"}}), M({{"add"}, {"add_0"}}),
+             {{"feed1", "add_1:0"},
+              {"feed2", "add_1:1"},
+              {"add_1:sum:0", "add"},
+              {"add_1:sum:0", "add_0"}},
+             {});
+}
+
+TEST_F(CApiFunctionTest, TwoOps_ThreeInputs_TwoOutputs) {
+  /*
+   *                  |  |  |
+   *                  v  v  /
+   *                  add  /
+   *                   |  |
+   *                 +-+  |
+   *                 | |  |
+   *                 | v  v
+   *                 | add
+   *                 |  |
+   *                 v  v
+   */
+  // Define
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* feed3 = Placeholder(func_graph_, s_, "feed3");
+  TF_Operation* add1 = Add(feed1, feed2, func_graph_, s_, "add1");
+  TF_Operation* add2 = Add(add1, feed3, func_graph_, s_, "add2");
+  Define(-1, {}, {feed1, feed2, feed3}, {add1, add2}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* two = ScalarConst(2, host_graph_, s_, "two");
+  TF_Operation* ten = ScalarConst(10, host_graph_, s_, "ten");
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({two, ten, func_feed});
+  Run({{func_feed, Int32Tensor(3)}}, {{func_op, 0}, {func_op, 1}}, {12, 15});
+  VerifyFDef({"add1_0", "add2_0"}, M({{"feed1"}, {"feed2"}, {"feed3"}}),
+             M({{"add1"}, {"add2"}}),
+             {{"feed1", "add1_0:0"},
+              {"feed2", "add1_0:1"},
+              {"add1_0:sum:0", "add2_0:0"},
+              {"feed3", "add2_0:1"},
+              {"add1_0:sum:0", "add1"},
+              {"add2_0:sum:0", "add2"}},
+             {});
+}
+
+TEST_F(CApiFunctionTest, FromSubsetOfOps) {
+  /*
+   *                  |  |  |
+   *                  v  v  /
+   *                  add  /
+   *                   |  |
+   *               +---+--+---+
+   *  Ops used     |   |  |   |
+   *  for func     |   v  v   |
+   *     |         |   add    |
+   *     +-------> |    |     |
+   *               |    v     |
+   *               |          |
+   *               +----------+
+   */
+  // Define
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* feed3 = Placeholder(func_graph_, s_, "feed3");
+  TF_Operation* add1 = Add(feed1, feed2, func_graph_, s_, "add1");
+  TF_Operation* add2 = Add(add1, feed3, func_graph_, s_, "add2");
+  Define(1, {add2}, {add1, feed3}, {add2}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* two = ScalarConst(2, host_graph_, s_, "two");
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({two, func_feed});
+  Run({{func_feed, Int32Tensor(3)}}, func_op, 2 + 3);
+  VerifyFDef(
+      {"add2_0"}, M({{"add1"}, {"feed3"}}), M({{"add2"}}),
+      {{"add1", "add2_0:0"}, {"feed3", "add2_0:1"}, {"add2_0:sum:0", "add2"}},
+      {});
+}
+
+TEST_F(CApiFunctionTest, UsingOneOutputOfSplit) {
+  /*
+   *                      feed
+   *                       |
+   *             +---------+---+
+   *             | const0  |   |
+   *             |    |    |   |
+   *             |    v    /   |
+   *             |    split    |
+   *             |   |  |  |   |
+   *             |   v  |  v   |
+   *             |      |      |
+   *             +------+------+
+   *                    |
+   *                    v
+   *
+   *  Only the second output from split is used as function output
+   */
+  // Define
+  TF_Operation* feed = Placeholder(func_graph_, s_);
+  TF_Operation* split = Split3(feed, func_graph_, s_);
+  DefineT(-1, {}, {{feed, 0}}, {{split, 1}}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({func_feed});
+  RunT({{func_feed, Int32Tensor({1, 2, 3, 4, 5, 6})}}, {{func_op, 0}},
+       {{3, 4}});
+  VerifyFDef({"split3_const0", "split3_0"}, M({{"feed"}}), M({{"split3"}}),
+             {{"split3_const0:output:0", "split3_0:0"},
+              {"feed", "split3_0:1"},
+              {"split3_0:output:1", "split3"}},
+             {});
+}
+
+TEST_F(CApiFunctionTest, UsingTwoOutputsOfSplit) {
+  /*
+   *                      feed
+   *                       |
+   *             +---------+---+
+   *             | const0  |   |
+   *             |    |    |   |
+   *             |    v    /   |
+   *             |    split    |
+   *             |   |  |  |   |
+   *             |   |  v  |   |
+   *             |   |     |   |
+   *             +---+-----+---+
+   *                 |     |
+   *                 v     v
+   *
+   *  Second output from split is not used as function output
+   */
+  // Define
+  TF_Operation* feed = Placeholder(func_graph_, s_);
+  TF_Operation* split = Split3(feed, func_graph_, s_);
+  DefineT(-1, {}, {{feed, 0}}, {{split, 0}, {split, 2}}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({func_feed});
+  RunT({{func_feed, Int32Tensor({1, 2, 3, 4, 5, 6})}},
+       {{func_op, 0}, {func_op, 1}}, {{1, 2}, {5, 6}});
+  VerifyFDef({"split3_const0", "split3_1"}, M({{"feed"}}),
+             M({{"split3"}, {"split3_0"}}),
+             {{"split3_const0:output:0", "split3_1:0"},
+              {"feed", "split3_1:1"},
+              {"split3_1:output:0", "split3"},
+              {"split3_1:output:2", "split3_0"}},
+             {});
+}
+
+TEST_F(CApiFunctionTest, UsingTwoOutputsOfSplitAsInputs) {
+  /*
+   *                    |
+   *                    v
+   *                  split
+   *                 |  |  |
+   *                 |  v  |
+   *                 |     |
+   *             +---+-----+---+
+   *             |   |     |   |
+   *             |   v     v   |
+   *             |     add     |
+   *             |      |      |
+   *             |      |      |
+   *             +------+------+
+   *                    |
+   *                    v
+   */
+  // Define
+  TF_Operation* feed = Placeholder(func_graph_, s_);
+  TF_Operation* split = Split3(feed, func_graph_, s_);
+  TF_Operation* add = Add({split, 0}, {split, 2}, func_graph_, s_);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  DefineT(1, {add}, {{split, 0}, {split, 2}}, {{add, 0}}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* two = ScalarConst(2, host_graph_, s_, "two");
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({two, func_feed});
+  Run({{func_feed, Int32Tensor(3)}}, func_op, 2 + 3);
+  VerifyFDef(
+      {"add_0"}, M({{"split3"}, {"split3_0"}}), M({{"add"}}),
+      {{"split3", "add_0:0"}, {"split3_0", "add_0:1"}, {"add_0:sum:0", "add"}},
+      {});
+}
+
+TEST_F(CApiFunctionTest, NodesUsedInInputsMustHaveSingleOutput) {
+  /*
+   *                    |
+   *                    v
+   *                  split
+   *                 |  |  |
+   *                 |  v  |
+   *                 |     |
+   *       input --->|     |<--- input
+   *                 |     |
+   *                 v     v
+   *                   add
+   *                    |
+   *                    |
+   *                    v
+   */
+  // Define
+  TF_Tensor* tensor_123 = Int32Tensor({1, 2, 3});
+  TF_Operation* c = Const(tensor_123, func_graph_, s_, "const_array");
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  TF_Operation* split = Split3(c, func_graph_, s_);
+  TF_Operation* add = Add({split, 0}, {split, 2}, func_graph_, s_);
+  ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  DefineT(-1, {}, {{split, 0}, {split, 2}}, {{add, 0}}, nullptr, true);
+  EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_));
+  EXPECT_EQ(string("When `num_opers` is set to -1, nodes referenced in "
+                   "`inputs` must have a single output. Node split3 has "
+                   "3 outputs. Encountered while creating function 'MyFunc'"),
+            string(TF_Message(s_)));
+
+  TF_DeleteTensor(tensor_123);
+}
+
+TEST_F(CApiFunctionTest, FunctionWithWhileLoop) {
+  // Inputs to the while loop and the function as a whole
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+
+  // Outputs of the while loop corresponding to the two inputs above
+  // The first one will the function's output
+  std::vector outputs;
+
+  // Add while loop to func_graph_
+  {
+    // The inputs to the while loop
+    std::vector inputs = {{feed1, 0}, {feed2, 0}};
+    std::unique_ptr params(new TF_WhileParams(
+        TF_NewWhile(func_graph_, &inputs[0], inputs.size(), s_)));
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+    params->name = "test_loop";
+
+    // Initialize outputs so we can easily detect errors/bugs
+    outputs.resize(2, {nullptr, -1});
+
+    // Create loop: while (input1 < input2) input1 += input2 + 1
+    TF_Operation* less_than = LessThan(
+        params->cond_inputs[0], params->cond_inputs[1], params->cond_graph, s_);
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+    params->cond_output = {less_than, 0};
+
+    TF_Operation* add1 = Add(params->body_inputs[0], params->body_inputs[1],
+                             params->body_graph, s_, "add1");
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+    TF_Operation* one = ScalarConst(1, params->body_graph, s_);
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+    TF_Operation* add2 = Add(add1, one, params->body_graph, s_, "add2");
+    ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+    params->body_outputs[0] = {add2, 0};
+    params->body_outputs[1] = params->body_inputs[1];
+
+    // Finalize while loop
+    TF_FinishWhile(params.get(), s_, &outputs[0]);
+    EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  }
+
+  // Define function, use it in graph, and run
+  DefineT(-1, {}, {{feed1, 0}, {feed2, 0}}, {outputs[0]}, nullptr);
+  TF_Operation* five = ScalarConst(5, host_graph_, s_, "five");
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({func_feed, five});
+  Run({{func_feed, Int32Tensor(2)}}, func_op, 2 /*+=*/ + 5 + 1);
+
+  // Verify input, output, and subset of edges in fdef.
+  // The subset of edges we verify is a chain between feed1 and output to
+  // make sure that the correct output is picked.
+  tensorflow::FunctionDef fdef;
+  ASSERT_TRUE(GetFunctionDef(func_, &fdef));
+  VerifyFDefInputs(fdef, M({{"feed1"}, {"feed2"}}));
+  VerifyFDefOutputs(fdef, M({{"test_loop_exit"}}));
+  VerifyFDefEdges(fdef,
+                  {{"feed1", "test_loop/Enter:0"},
+                   {"test_loop/Enter:output:0", "test_loop/Merge:0"},
+                   {"test_loop/Merge:output:0", "test_loop/Switch:0"},
+                   {"test_loop/Switch:output_false:0", "test_loop/Exit:0"},
+                   {"test_loop/Exit:output:0", "test_loop_exit"}},
+                  {}, false);
+}
+
+TEST_F(CApiFunctionTest, ControlDependency) {
+  /*
+   *                  |  |    scalar
+   *                  |  |    .
+   *                  v  v   . <---- control dependency
+   *                  add < -
+   *                   |
+   *                   v
+   */
+  // Define
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* five = ScalarConst(5, func_graph_, s_);
+  TF_Operation* add =
+      AddWithCtrlDependency(feed1, feed2, func_graph_, five, s_);
+  EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  Define(-1, {}, {feed1, feed2}, {add}, nullptr);
+
+  // Use, run, and verify
+  TF_Operation* two = ScalarConst(2, host_graph_, s_);
+  TF_Operation* func_feed = Placeholder(host_graph_, s_);
+  TF_Operation* func_op = Use({two, func_feed});
+  Run({{func_feed, Int32Tensor(3)}}, func_op, 2 + 3);
+  VerifyFDef(
+      {"add_0", "scalar"}, M({{"feed1"}, {"feed2"}}), M({{"add"}}),
+      {{"feed1", "add_0:0"}, {"feed2", "add_0:1"}, {"add_0:sum:0", "add"}},
+      {{"scalar", "add_0"}});
+}
+
+TEST_F(CApiFunctionTest, ControlDependencyOutsideOfBody) {
+  /*
+   *                  |  |    scalar
+   *                  |  |    .
+   *                  v  v   . <---- control dependency
+   *                  add < -
+   *                   |
+   *                   v
+   */
+  // Define
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* five = ScalarConst(5, func_graph_, s_);
+  TF_Operation* add =
+      AddWithCtrlDependency(feed1, feed2, func_graph_, five, s_);
+  EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  Define(1, {add}, {feed1, feed2}, {add}, nullptr, true);
+  EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_));
+  EXPECT_EQ(string("The source of control edge [id=3 scalar:-1 -> add:-1] "
+                   "is not in the body. Encountered while creating "
+                   "function 'MyFunc'"),
+            string(TF_Message(s_)));
+}
+
+TEST_F(CApiFunctionTest, ControlDependencyOutsideOfBody_FromInputNode) {
+  /*
+   *                  |  |.
+   *                  |  |  .
+   *                  |  |   .
+   *                  v  v   . <---- control dependency
+   *                  add < -
+   *                   |
+   *                   v
+   */
+  // Define
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* add =
+      AddWithCtrlDependency(feed1, feed2, func_graph_, feed1, s_);
+  EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+  Define(-1, {}, {feed1, feed2}, {add}, nullptr, true);
+  EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_));
+  EXPECT_EQ(string("The source of control edge [id=3 feed1:-1 -> add:-1] "
+                   "is not in the body. Encountered while creating "
+                   "function 'MyFunc'"),
+            string(TF_Message(s_)));
+}
+
+TEST_F(CApiFunctionTest, DuplicateInputsAreNotAllowed) {
+  /*
+   *                  feed
+   *                   |
+   *                  +++
+   *                  | |
+   *              +---+-+---+
+   *              |   | |   |
+   *              |   v v   |
+   *              |   add   |
+   *              |    |    |
+   *              |    |    |
+   *              +----+----+
+   *                   |
+   *                   v
+   */
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* add = Add(feed1, feed1, func_graph_, s_);
+  Define(-1, {}, {feed1, feed1}, {add}, nullptr, true);
+  EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_));
+  EXPECT_EQ(
+      string("TF_Output feed1:0 appears more than once in the input list"),
+      string(TF_Message(s_)));
+}
+
+TEST_F(CApiFunctionTest, InvalidInputTensor_HighIndex) {
+  /*
+   *                  |  |
+   *                  v  v
+   *                  add
+   *                   |
+   *                   v
+   */
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* add = Add(feed1, feed2, func_graph_, s_);
+  DefineT(-1, {}, {{feed1, 0}, {feed2, 2}}, {{add, 0}}, nullptr, true);
+  EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_));
+  EXPECT_EQ(string("Node 'feed2' (type: 'Placeholder', num of outputs: 1) does "
+                   "not have output 2\n\tEncountered while processing "
+                   "input 1 into function 'MyFunc'"),
+            string(TF_Message(s_)));
+}
+
+TEST_F(CApiFunctionTest, InvalidInputTensor_BadNodePtr) {
+  /*
+   *                  |  |
+   *                  v  v
+   *                  add
+   *                   |
+   *                   v
+   */
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* add = Add(feed1, feed2, func_graph_, s_);
+  DefineT(-1, {}, {{feed1, 0}, {nullptr, 0}}, {{add, 0}}, nullptr, true);
+  EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_));
+  EXPECT_EQ(string("Node is null\n\tEncountered while processing input 1 "
+                   "into function 'MyFunc'"),
+            string(TF_Message(s_)));
+}
+
+TEST_F(CApiFunctionTest, InvalidOutputTensor_HighIndex) {
+  /*
+   *                  |  |
+   *                  v  v
+   *                  add
+   *                   |
+   *                   v
+   */
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* add = Add(feed1, feed2, func_graph_, s_);
+  DefineT(-1, {}, {{feed1, 0}, {feed2, 0}}, {{add, 3}}, nullptr, true);
+  EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_));
+  EXPECT_EQ(string("Node 'add' (type: 'AddN', num of outputs: 1) does "
+                   "not have output 3\n\tEncountered while processing "
+                   "output 0 from function 'MyFunc'"),
+            string(TF_Message(s_)));
+}
+
+TEST_F(CApiFunctionTest, InvalidOutputTensor_BadNodePtr) {
+  /*
+   *                  |  |
+   *                  v  v
+   *                  add
+   *                   |
+   *                   v
+   */
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  Add(feed1, feed2, func_graph_, s_);
+  DefineT(-1, {}, {{feed1, 0}, {feed2, 0}}, {{nullptr, 3}}, nullptr, true);
+  EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_));
+  EXPECT_EQ(string("Node is null\n\tEncountered while processing output 0 "
+                   "from function 'MyFunc'"),
+            string(TF_Message(s_)));
+}
+
+TEST_F(CApiFunctionTest, NodeMissingInput) {
+  /*
+   *        input---> |  | <----missing input
+   *                  v  v
+   *        body----> add
+   *                   |
+   *                   v
+   */
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* add = Add(feed1, feed2, func_graph_, s_);
+  DefineT(1, {add}, {{feed1, 0}}, {{add, 0}}, nullptr, true);
+  EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_));
+  EXPECT_EQ(string("Input 1, 'feed2:0', of node 'add' in function 'MyFunc' "
+                   "is not available. You might need to include it in inputs "
+                   "or include its source node in the body"),
+            string(TF_Message(s_)));
+}
+
+TEST_F(CApiFunctionTest, OutputOpNotInBody) {
+  /*
+   *                  |  |
+   *                  v  v
+   *                  add    scalar    (scalar not included in body)
+   *                   |       |
+   *                   v       v       (function has two outputs)
+   */
+  // Define
+  TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1");
+  TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2");
+  TF_Operation* scalar = ScalarConst(2, func_graph_, s_);
+  TF_Operation* add = Add(feed1, feed2, func_graph_, s_);
+  Define(1, {add}, {feed1, feed2}, {add, scalar}, nullptr, true);
+  EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_));
+  EXPECT_EQ(string("TF_Output scalar:0 is neither in the function body nor "
+                   "among function inputs. Encountered while creating "
+                   "function 'MyFunc'"),
+            string(TF_Message(s_)));
+}
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h
index f7d25dce8f..6e44a72e2b 100644
--- a/tensorflow/c/c_api_internal.h
+++ b/tensorflow/c/c_api_internal.h
@@ -130,6 +130,11 @@ struct TF_DeviceList {
   std::vector response;
 };
 
+struct TF_Function {
+  // Currently contains a single function and no gradients
+  tensorflow::FunctionDefLibrary fdef_lib;
+};
+
 namespace tensorflow {
 
 class TensorCApi {
@@ -142,6 +147,9 @@ class TensorCApi {
 };
 
 TF_Tensor* TF_TensorFromTensor(const Tensor& src, TF_Status* status);
+
+Status MessageToBuffer(const tensorflow::protobuf::Message& in, TF_Buffer* out);
+
 }  // end namespace tensorflow
 
 #endif  // TENSORFLOW_C_C_API_INTERNAL_H_
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index 0aa60fb45d..c442029009 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -829,7 +829,7 @@ TEST(CAPI, ShapeInferenceError) {
   TF_Operation* vec3 = Const(vec3_tensor.get(), graph, status, "vec3");
   ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
 
-  TF_Operation* add = Add(vec2, vec3, graph, status);
+  TF_Operation* add = AddNoCheck(vec2, vec3, graph, status);
   ASSERT_NE(TF_OK, TF_GetCode(status));
   ASSERT_TRUE(add == nullptr);
 
diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc
index 21603c1a07..9cd978c97e 100644
--- a/tensorflow/c/c_test_util.cc
+++ b/tensorflow/c/c_test_util.cc
@@ -15,7 +15,9 @@ limitations under the License.
 
 #include "tensorflow/c/c_test_util.h"
 
+#include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/tensor.pb.h"
+#include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
 
 using tensorflow::GraphDef;
@@ -36,6 +38,23 @@ TF_Tensor* Int8Tensor(const int64_t* dims, int num_dims, const char* values) {
   return t;
 }
 
+TF_Tensor* Int32Tensor(const int64_t* dims, int num_dims,
+                       const int32_t* values) {
+  int64_t num_values = 1;
+  for (int i = 0; i < num_dims; ++i) {
+    num_values *= dims[i];
+  }
+  TF_Tensor* t =
+      TF_AllocateTensor(TF_INT32, dims, num_dims, sizeof(int32_t) * num_values);
+  memcpy(TF_TensorData(t), values, sizeof(int32_t) * num_values);
+  return t;
+}
+
+TF_Tensor* Int32Tensor(const std::vector& values) {
+  int64_t dims = values.size();
+  return Int32Tensor(&dims, 1, values.data());
+}
+
 TF_Tensor* Int32Tensor(int32_t v) {
   const int num_bytes = sizeof(int32_t);
   int32_t* values = new int32_t[1];
@@ -44,19 +63,40 @@ TF_Tensor* Int32Tensor(int32_t v) {
                       &Int32Deallocator, nullptr);
 }
 
-TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name) {
+// All the *Helper methods are used as a workaround for the restrictions that
+// one cannot call ASSERT_* methods in non-void-returning functions (when
+// exceptions are disabled during compilation)
+void PlaceholderHelper(TF_Graph* graph, TF_Status* s, const char* name,
+                       TF_Operation** op) {
   TF_OperationDescription* desc = TF_NewOperation(graph, "Placeholder", name);
   TF_SetAttrType(desc, "dtype", TF_INT32);
-  return TF_FinishOperation(desc, s);
+  *op = TF_FinishOperation(desc, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  ASSERT_NE(*op, nullptr);
 }
 
-TF_Operation* Const(TF_Tensor* t, TF_Graph* graph, TF_Status* s,
-                    const char* name) {
+TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name) {
+  TF_Operation* op;
+  PlaceholderHelper(graph, s, name, &op);
+  return op;
+}
+
+void ConstHelper(TF_Tensor* t, TF_Graph* graph, TF_Status* s, const char* name,
+                 TF_Operation** op) {
   TF_OperationDescription* desc = TF_NewOperation(graph, "Const", name);
   TF_SetAttrTensor(desc, "value", t, s);
-  if (TF_GetCode(s) != TF_OK) return nullptr;
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
   TF_SetAttrType(desc, "dtype", TF_TensorType(t));
-  return TF_FinishOperation(desc, s);
+  *op = TF_FinishOperation(desc, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  ASSERT_NE(*op, nullptr);
+}
+
+TF_Operation* Const(TF_Tensor* t, TF_Graph* graph, TF_Status* s,
+                    const char* name) {
+  TF_Operation* op;
+  ConstHelper(t, graph, s, name, &op);
+  return op;
 }
 
 TF_Operation* ScalarConst(int32_t v, TF_Graph* graph, TF_Status* s,
@@ -65,11 +105,39 @@ TF_Operation* ScalarConst(int32_t v, TF_Graph* graph, TF_Status* s,
   return Const(tensor.get(), graph, s, name);
 }
 
+void AddHelper(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s,
+               const char* name, TF_Operation** op, bool check) {
+  TF_OperationDescription* desc = TF_NewOperation(graph, "AddN", name);
+  TF_Output add_inputs[2] = {{l, 0}, {r, 0}};
+  TF_AddInputList(desc, add_inputs, 2);
+  *op = TF_FinishOperation(desc, s);
+  if (check) {
+    ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+    ASSERT_NE(*op, nullptr);
+  }
+}
+
 TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph,
                   TF_Status* s, const char* name) {
+  TF_Operation* op;
+  AddHelper(l, r, graph, s, name, &op, true);
+  return op;
+}
+
+TF_Operation* AddNoCheck(TF_Operation* l, TF_Operation* r, TF_Graph* graph,
+                         TF_Status* s, const char* name) {
+  TF_Operation* op;
+  AddHelper(l, r, graph, s, name, &op, false);
+  return op;
+}
+
+TF_Operation* AddWithCtrlDependency(TF_Operation* l, TF_Operation* r,
+                                    TF_Graph* graph, TF_Operation* ctrl_op,
+                                    TF_Status* s, const char* name) {
   TF_OperationDescription* desc = TF_NewOperation(graph, "AddN", name);
   TF_Output add_inputs[2] = {{l, 0}, {r, 0}};
   TF_AddInputList(desc, add_inputs, 2);
+  TF_AddControlInput(desc, ctrl_op);
   return TF_FinishOperation(desc, s);
 }
 
@@ -81,11 +149,20 @@ TF_Operation* Add(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s,
   return TF_FinishOperation(desc, s);
 }
 
-TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s) {
+void NegHelper(TF_Operation* n, TF_Graph* graph, TF_Status* s,
+               TF_Operation** op) {
   TF_OperationDescription* desc = TF_NewOperation(graph, "Neg", "neg");
   TF_Output neg_input = {n, 0};
   TF_AddInput(desc, neg_input);
-  return TF_FinishOperation(desc, s);
+  *op = TF_FinishOperation(desc, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  ASSERT_NE(*op, nullptr);
+}
+
+TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s) {
+  TF_Operation* op;
+  NegHelper(n, graph, s, &op);
+  return op;
 }
 
 TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph,
@@ -96,6 +173,32 @@ TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph,
   return TF_FinishOperation(desc, s);
 }
 
+void Split3Helper(TF_Operation* input, TF_Graph* graph, TF_Status* s,
+                  const char* name, TF_Operation** op) {
+  TF_Operation* zero = ScalarConst(
+      0, graph, s, ::tensorflow::strings::StrCat(name, "_const0").c_str());
+  TF_OperationDescription* desc = TF_NewOperation(graph, "Split", name);
+  TF_AddInput(desc, {zero, 0});
+  TF_AddInput(desc, {input, 0});
+  TF_SetAttrInt(desc, "num_split", 3);
+  TF_SetAttrType(desc, "T", TF_INT32);
+  // Set device to CPU since there is no version of split for int32 on GPU
+  // TODO(iga): Convert all these helpers and tests to use floats because
+  // they are usually available on GPUs. After doing this, remove TF_SetDevice
+  // call in c_api_function_test.cc
+  TF_SetDevice(desc, "/cpu:0");
+  *op = TF_FinishOperation(desc, s);
+  ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  ASSERT_NE(*op, nullptr);
+}
+
+TF_Operation* Split3(TF_Operation* input, TF_Graph* graph, TF_Status* s,
+                     const char* name) {
+  TF_Operation* op;
+  Split3Helper(input, graph, s, name, &op);
+  return op;
+}
+
 bool IsPlaceholder(const tensorflow::NodeDef& node_def) {
   if (node_def.op() != "Placeholder" || node_def.name() != "feed") {
     return false;
@@ -196,6 +299,18 @@ bool GetNodeDef(TF_Operation* oper, tensorflow::NodeDef* node_def) {
   return ret;
 }
 
+bool GetFunctionDef(TF_Function* func, tensorflow::FunctionDef* func_def) {
+  TF_Status* s = TF_NewStatus();
+  TF_Buffer* buffer = TF_NewBuffer();
+  TF_FunctionToFunctionDef(func, buffer, s);
+  bool ret = TF_GetCode(s) == TF_OK;
+  EXPECT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+  if (ret) ret = func_def->ParseFromArray(buffer->data, buffer->length);
+  TF_DeleteBuffer(buffer);
+  TF_DeleteStatus(s);
+  return ret;
+}
+
 bool GetAttrValue(TF_Operation* oper, const char* attr_name,
                   tensorflow::AttrValue* attr_value, TF_Status* s) {
   TF_Buffer* buffer = TF_NewBuffer();
diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h
index 0c0ba667bd..a927739d46 100644
--- a/tensorflow/c/c_test_util.h
+++ b/tensorflow/c/c_test_util.h
@@ -33,6 +33,13 @@ typedef std::unique_ptr
 // Create a tensor with values of type TF_INT8 provided by `values`.
 TF_Tensor* Int8Tensor(const int64_t* dims, int num_dims, const char* values);
 
+// Create a tensor with values of type TF_INT32 provided by `values`.
+TF_Tensor* Int32Tensor(const int64_t* dims, int num_dims,
+                       const int32_t* values);
+
+// Create 1 dimensional tensor with values from `values`
+TF_Tensor* Int32Tensor(const std::vector& values);
+
 TF_Tensor* Int32Tensor(int32_t v);
 
 TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s,
@@ -47,6 +54,13 @@ TF_Operation* ScalarConst(int32_t v, TF_Graph* graph, TF_Status* s,
 TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph,
                   TF_Status* s, const char* name = "add");
 
+TF_Operation* AddNoCheck(TF_Operation* l, TF_Operation* r, TF_Graph* graph,
+                         TF_Status* s, const char* name = "add");
+
+TF_Operation* AddWithCtrlDependency(TF_Operation* l, TF_Operation* r,
+                                    TF_Graph* graph, TF_Operation* ctrl_op,
+                                    TF_Status* s, const char* name = "add");
+
 TF_Operation* Add(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s,
                   const char* name = "add");
 
@@ -54,6 +68,10 @@ TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s);
 
 TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s);
 
+// Split `input` along the first dimention into 3 tensors
+TF_Operation* Split3(TF_Operation* input, TF_Graph* graph, TF_Status* s,
+                     const char* name = "split3");
+
 bool IsPlaceholder(const tensorflow::NodeDef& node_def);
 
 bool IsScalarConst(const tensorflow::NodeDef& node_def, int v);
@@ -66,6 +84,8 @@ bool GetGraphDef(TF_Graph* graph, tensorflow::GraphDef* graph_def);
 
 bool GetNodeDef(TF_Operation* oper, tensorflow::NodeDef* node_def);
 
+bool GetFunctionDef(TF_Function* func, tensorflow::FunctionDef* func_def);
+
 bool GetAttrValue(TF_Operation* oper, const char* attr_name,
                   tensorflow::AttrValue* attr_value, TF_Status* s);
 
diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake
index 87d946c346..c5a1018127 100644
--- a/tensorflow/contrib/cmake/tf_c.cmake
+++ b/tensorflow/contrib/cmake/tf_c.cmake
@@ -18,6 +18,7 @@
 set(tf_c_srcs
     "${tensorflow_source_dir}/tensorflow/c/c_api.cc"
     "${tensorflow_source_dir}/tensorflow/c/c_api.h"
+    "${tensorflow_source_dir}/tensorflow/c/c_api_function.cc"
     "${tensorflow_source_dir}/tensorflow/c/eager/c_api.cc"
     "${tensorflow_source_dir}/tensorflow/c/eager/c_api.h"
     "${tensorflow_source_dir}/tensorflow/c/eager/runtime.cc"
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 7d938365c5..a274c79970 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -523,6 +523,17 @@ Status Graph::IsValidNode(const Node* node) const {
   return Status::OK();
 }
 
+Status Graph::IsValidOutputTensor(const Node* node, int idx) const {
+  TF_RETURN_IF_ERROR(IsValidNode(node));
+  if (idx >= node->num_outputs()) {
+    return errors::InvalidArgument("Node '", node->name(), "' (type: '",
+                                   node->op_def().name(),
+                                   "', num of outputs: ", node->num_outputs(),
+                                   ") does not have ", "output ", idx);
+  }
+  return Status::OK();
+}
+
 Node* Graph::AllocateNode(std::shared_ptr props,
                           const Node* cost_node) {
   Node* node = nullptr;
@@ -572,7 +583,7 @@ int Graph::InternDeviceName(const string& device_name) {
 }
 
 string Edge::DebugString() const {
-  return strings::Printf("Edge %d %s:%d -> %s:%d", id_, src_->name().c_str(),
+  return strings::Printf("[id=%d %s:%d -> %s:%d]", id_, src_->name().c_str(),
                          src_output_, dst_->name().c_str(), dst_input_);
 }
 
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 51ede642d2..25875185e4 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -519,6 +519,10 @@ class Graph {
   // Returns OK if `node` is non-null and belongs to this graph
   Status IsValidNode(const Node* node) const;
 
+  // Returns OK if IsValidNode(`node`) and `idx` is less than
+  // node->num_outputs()
+  Status IsValidOutputTensor(const Node* node, int idx) const;
+
   // TODO(josh11b): uint64 hash() const;
 
  private:
diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i
index 08dd3922db..fa49e66e87 100644
--- a/tensorflow/python/client/tf_session.i
+++ b/tensorflow/python/client/tf_session.i
@@ -373,6 +373,33 @@ def TF_Reset(target, containers=None, config=None):
     TF_DeleteSessionOptions(opts)
 %}
 
+// We use TF_GraphToFunction_wrapper instead of TF_GraphToFunction
+%ignore TF_GraphToFunction;
+// TF_GraphToFunction_wrapper does not use any Python methods and
+// does not require GIL to be held.
+%unignore TF_GraphToFunction_wrapper;
+
+// $input is a Python list of wrapped TF_Operations
+%typemap(in) (const std::vector* opers)
+    (std::vector opers) {
+  if ($input != Py_None) {
+    if (!PyList_Check($input)) {
+      SWIG_exception_fail(SWIG_TypeError, "$symname: expected list");
+    }
+    size_t size = PyList_Size($input);
+    for (int i = 0; i < size; ++i) {
+      PyObject* item = PyList_GetItem($input, i);
+      TF_Operation* oper_ptr;
+      SWIG_ConvertPtr(item, reinterpret_cast(&oper_ptr),
+                      $descriptor(TF_Operation*), 0);
+      opers.push_back(oper_ptr);
+    }
+    $1 = &opers;
+  } else {
+    $1 = nullptr;
+  }
+}
+
 %include "tensorflow/python/client/tf_session_helper.h"
 
 %unignoreall
diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc
index 60a589fa8b..72f560fa87 100644
--- a/tensorflow/python/client/tf_session_helper.cc
+++ b/tensorflow/python/client/tf_session_helper.cc
@@ -337,4 +337,38 @@ std::vector TF_OperationGetControlInputs_wrapper(
   return control_inputs;
 }
 
+TF_Function* TF_GraphToFunction_wrapper(const TF_Graph* fn_body,
+                                        const char* fn_name,
+                                        const std::vector* opers,
+                                        const std::vector& inputs,
+                                        const std::vector& outputs,
+                                        const NameVector& output_names,
+                                        const TF_FunctionOptions* opts,
+                                        TF_Status* out_status) {
+  if (!output_names.empty() && output_names.size() != outputs.size()) {
+    Set_TF_Status_from_Status(
+        out_status,
+        errors::InvalidArgument(
+            "output names must be either empty or equal in size to outputs. ",
+            "output names size = ", output_names.size(),
+            " outputs size = ", outputs.size()));
+    return nullptr;
+  }
+
+  int nopers = -1;
+  const TF_Operation* const* opers_array = nullptr;
+  if (opers != nullptr) {
+    nopers = opers->size();
+    opers_array = opers->data();
+  }
+
+  const char** output_names_ptr =
+      output_names.empty() ? nullptr
+                           : const_cast(output_names.data());
+
+  return TF_GraphToFunction(fn_body, fn_name, nopers, opers_array,
+                            inputs.size(), inputs.data(), outputs.size(),
+                            outputs.data(), output_names_ptr, opts, out_status);
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h
index 3bc63f822f..8fae6206c0 100644
--- a/tensorflow/python/client/tf_session_helper.h
+++ b/tensorflow/python/client/tf_session_helper.h
@@ -148,6 +148,16 @@ void TF_SessionPRun_wrapper(TF_Session* session, const char* handle,
 std::vector TF_OperationGetControlInputs_wrapper(
     TF_Operation* oper);
 
+// `opers` equaling NULL are converted to `nopers = -1`.
+// `output_names` must be empty or have the same length as `outputs`.
+TF_Function* TF_GraphToFunction_wrapper(const TF_Graph* fn_body,
+                                        const char* fn_name,
+                                        const std::vector* opers,
+                                        const std::vector& inputs,
+                                        const std::vector& outputs,
+                                        const NameVector& output_names,
+                                        const TF_FunctionOptions* opts,
+                                        TF_Status* out_status);
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_PYTHON_CLIENT_TF_SESSION_HELPER_H_
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 2f35f0e04b..7a866ee6e8 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -26,7 +26,9 @@ import hashlib
 
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.core.framework import op_def_pb2
+from tensorflow.python import pywrap_tensorflow as c_api
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import graph_to_function_def
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -290,6 +292,7 @@ class _DefinedFunction(object):
     self._shape_func = shape_func
     self._extra_kwargs = kwargs
     self._definition = None  # Constructed lazily.
+    self._c_func = None  # Constructed with definition.
     self._sub_functions = dict()  # Constructed with definition.
 
     self._args = []
@@ -396,6 +399,22 @@ class _DefinedFunction(object):
     if self._func.__doc__:
       self._definition.signature.description = self._func.__doc__
 
+    # pylint: disable=protected-access
+    if temp_graph._c_graph:
+      with errors.raise_exception_on_not_ok_status() as status:
+        output_names = ([compat.as_bytes(x) for x in self._out_names]
+                        if self._out_names else [])
+        self._c_func = c_api.TF_GraphToFunction_wrapper(
+            temp_graph._c_graph,
+            self._func_name,
+            None,  # opers
+            [t._as_tf_output() for t in inputs],
+            [t._as_tf_output() for t in outputs],
+            output_names,
+            None,  # opts
+            status)
+    # pylint: enable=protected-access
+
   def _create_hash_str(self, input_arg, output_arg, node_def):
     """Creates an 8-character string unique to this input.
 
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 589db9ef4d..40205ddf05 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.framework import function
 from tensorflow.python.framework import graph_to_function_def
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import control_flow_ops
@@ -63,7 +64,51 @@ def _OptimizerOptions():
                 do_constant_folding=cfold)))
 
 
-class FunctionTest(test.TestCase):
+class FunctionTestMethods(object):
+  """Test methods for verifying Function support.
+
+  These test methods are used as mix-ins in two test cases: with
+  and without C API support.
+  """
+
+  def testIdentity(self):
+
+    @function.Defun(dtypes.float32, func_name="MyIdentity")
+    def MyIdentityFunc(a):
+      return a
+
+    with ops.Graph().as_default():
+      call = MyIdentityFunc([18.0])
+      self.assertEqual("MyIdentity", call.op.name)
+      with session.Session() as sess:
+        self.assertAllEqual([18.0], sess.run(call))
+
+  def testIdentityOutputName(self):
+
+    @function.Defun(
+        dtypes.float32, func_name="MyIdentity", out_names=["my_result_name"])
+    def MyIdentityFunc(a):
+      return a
+
+    with ops.Graph().as_default():
+      call = MyIdentityFunc([18.0])
+      self.assertEqual("MyIdentity", call.op.name)
+      with session.Session() as sess:
+        self.assertAllEqual([18.0], sess.run(call))
+
+  def testTooManyOutputNames(self):
+
+    @function.Defun(
+        dtypes.float32, func_name="MyIdentity",
+        out_names=["my_result1", "my_result2"])
+    def MyIdentityFunc(a):
+      return a
+
+    with ops.Graph().as_default():
+      with self.assertRaisesRegexp(
+          ValueError, (r"Length of out_names \(2\) does not match number of "
+                       r"outputs \(1\): my_result1, my_result2")):
+        MyIdentityFunc([18.0])
 
   def testDefineFunction2Args(self):
 
@@ -77,6 +122,35 @@ class FunctionTest(test.TestCase):
       with session.Session() as sess:
         self.assertAllEqual([5.0], sess.run(call))
 
+  def testValueErrorOnFunctionWithNoOutput(self):
+    # TODO(iga): Remove this restriction and this test
+
+    @function.Defun(dtypes.float32, dtypes.float32)
+    def APlus2B(a, b):
+      print(a + b * 2)  # Create some ops to have nodes in the body
+                        # Using 'print' to make lint happy
+
+    with ops.Graph().as_default():
+      with self.assertRaisesRegexp(ValueError,
+                                   "Function can not return None"):
+        APlus2B([1.0], [2.0])
+
+  def testDefineFunction2ArgsOutputName(self):
+
+    @function.Defun(
+        dtypes.float32,
+        dtypes.float32,
+        func_name="APlus2B",
+        out_names=["my_result_name"])
+    def APlus2B(a, b):
+      return a + b * 2
+
+    with ops.Graph().as_default():
+      call = APlus2B([1.0], [2.0])
+      self.assertEqual("APlus2B", call.op.name)
+      with session.Session() as sess:
+        self.assertAllEqual([5.0], sess.run(call))
+
   def testDefineFunctionDuplicateOutputs(self):
 
     @function.Defun(dtypes.float32, func_name="Duplicate")
@@ -137,6 +211,7 @@ class FunctionTest(test.TestCase):
       out, = sess.run(dx, feed)
     self.assertAllClose(1 - np.square(np.tanh(inp)), out)
 
+  @test_util.disable_c_api   # Function gradients don't work with C API
   def testCustomGradient(self):
     dtype = dtypes.float32
 
@@ -169,6 +244,7 @@ class FunctionTest(test.TestCase):
         out, = sess.run(dlogits, {logits: x, labels: y})
       self.assertAllClose(out, np.exp(prob - y))
 
+  @test_util.disable_c_api   # Function gradients don't work with C API
   def testCustomGradientError(self):
     dtype = dtypes.float32
 
@@ -194,6 +270,7 @@ class FunctionTest(test.TestCase):
           "SymGrad expects to return 1.*but get 2.*instead"):
         _ = sess.run(dinp, {inp: x})
 
+  @test_util.disable_c_api   # Function gradients don't work with C API
   def testSymGradShape(self):
     g = ops.Graph()
     with g.as_default():
@@ -209,6 +286,7 @@ class FunctionTest(test.TestCase):
       self.assertEqual(x.get_shape(), dx.get_shape())
       self.assertEqual(y.get_shape(), dy.get_shape())
 
+  @test_util.disable_c_api   # Function gradients don't work with C API
   def testSymGradAttr(self):
 
     @function.Defun(noinline=True)
@@ -312,6 +390,7 @@ class FunctionTest(test.TestCase):
                                    "assertion failed.*-3"):
         self.assertAllEqual(Foo(constant_op.constant(-3.0)).eval(), 6.0)
 
+  @test_util.disable_c_api   # Op._add_control_inputs doesn't work with C API
   def testAssertWrapper(self):
 
     @function.Defun(dtypes.float32)
@@ -326,6 +405,7 @@ class FunctionTest(test.TestCase):
                                    "assertion"):
         _ = MyFn(100.0).eval()
 
+  @test_util.disable_c_api   # Op._add_control_inputs doesn't work with C API
   def testWhileLoopCallsFunc(self):
     with self.test_session(use_gpu=True) as sess:
 
@@ -345,6 +425,7 @@ class FunctionTest(test.TestCase):
       ans = sess.run(loop)
       self.assertAllClose(ans, 131072.)
 
+  @test_util.disable_c_api   # Op._add_control_inputs doesn't work with C API
   def testControlFlowStrictness(self):
     """Inlined functions must not execute in a untaken control flow branch."""
 
@@ -607,6 +688,7 @@ class FunctionTest(test.TestCase):
       self.assertAllClose(vals[0], vals[1])
       self.assertAllClose(vals[2], vals[3])
 
+  @test_util.disable_c_api   # Function Declaration doesn't work with C API
   def testDeclare(self):
     foo = function.Declare("Foo", [("x", dtypes.float32)], [("y",
                                                              dtypes.float32)])
@@ -626,6 +708,7 @@ class FunctionTest(test.TestCase):
       expected = rand * rand + 1.0
       self.assertAllClose(expected, y.eval(feed_dict={x: rand}))
 
+  @test_util.disable_c_api   # Function Declaration doesn't work with C API
   def testDeclareUsedInDefun(self):
     foo = function.Declare("Foo", [("x", dtypes.float32)], [("y",
                                                              dtypes.float32)])
@@ -649,6 +732,7 @@ class FunctionTest(test.TestCase):
       expected = rand * rand + 1.0
       self.assertAllClose(expected, y.eval(feed_dict={x: rand}))
 
+  @test_util.disable_c_api   # Function Declaration doesn't work with C API
   def testDeclareTypeMistake(self):
     foo = function.Declare("Foo", [("x", dtypes.float32)], [("y",
                                                              dtypes.float32)])
@@ -861,6 +945,32 @@ class FunctionTest(test.TestCase):
     self.assertEqual(len(f.signature.input_arg), 3)
 
 
+class FunctionTest(FunctionTestMethods, test.TestCase):
+  """Test case that invokes test methods with _USE_C_API=False."""
+
+  def setUp(self):
+    self.prev_use_c_api = ops._USE_C_API
+    ops._USE_C_API = False
+    super(FunctionTest, self).setUp()
+
+  def tearDown(self):
+    ops._USE_C_API = self.prev_use_c_api
+    super(FunctionTest, self).tearDown()
+
+
+class FunctionWithCApiTest(FunctionTestMethods, test.TestCase):
+  """Test case that invokes test methods with _USE_C_API=True."""
+
+  def setUp(self):
+    self.prev_use_c_api = ops._USE_C_API
+    ops._USE_C_API = True
+    super(FunctionWithCApiTest, self).setUp()
+
+  def tearDown(self):
+    ops._USE_C_API = self.prev_use_c_api
+    super(FunctionWithCApiTest, self).tearDown()
+
+
 class FunctionsFromProtos(test.TestCase):
 
   def expectFunctionsEqual(self, func, grad_func=None, new_func=None):
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index ccaa2141b5..659bc394b9 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -2948,6 +2948,14 @@ class Graph(object):
     if self._graph_def_versions.min_consumer < 12:
       self._graph_def_versions.min_consumer = 12
     self._functions[name] = function
+    if self._c_graph:
+      # pylint: disable=protected-access
+      assert function._c_func, (
+          "Cannot add function created without C API support to graph "
+          "created with C API support")
+      with errors.raise_exception_on_not_ok_status() as status:
+        c_api.TF_GraphAddFunction(self._c_graph, function._c_func, status)
+      # pylint: enable=protected-access
 
   @property
   def building_function(self):
-- 
GitLab


From 6523d8303c4df74cca1d914d4d5d4c126292b019 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 21:16:58 -0700
Subject: [PATCH 140/294] Make SummaryEntry a msan-resistant plain-old-data
 (something that can be safely memcpy'd).

Without initializing padded bytes from memory alignment, msan complains when it's used as plain-old-data.

plain-old-data = http://en.cppreference.com/w/cpp/concept/PODType
PiperOrigin-RevId: 167091849
---
 .../quantiles/weighted_quantiles_summary.h    | 24 ++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h
index 314c44fddc..dad3b4e10d 100644
--- a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h
+++ b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h
@@ -15,6 +15,7 @@
 #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_QUANTILES_WEIGHTED_QUANTILES_SUMMARY_H_
 #define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_QUANTILES_WEIGHTED_QUANTILES_SUMMARY_H_
 
+#include 
 #include 
 
 #include "tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h"
@@ -34,10 +35,27 @@ class WeightedQuantilesSummary {
 
   struct SummaryEntry {
     SummaryEntry(const ValueType& v, const WeightType& w, const WeightType& min,
-                 const WeightType& max)
-        : value(v), weight(w), min_rank(min), max_rank(max) {}
+                 const WeightType& max) {
+      // Explicitely initialize all of memory (including padding from memory
+      // alignment) to allow the struct to be msan-resistant "plain old data".
+      //
+      // POD = http://en.cppreference.com/w/cpp/concept/PODType
+      memset(this, 0, sizeof(*this));
+
+      value = v;
+      weight = w;
+      min_rank = min;
+      max_rank = max;
+    }
+
+    SummaryEntry() {
+      memset(this, 0, sizeof(*this));
 
-    SummaryEntry() : value(0), weight(0), min_rank(0), max_rank(0) {}
+      value = 0;
+      weight = 0;
+      min_rank = 0;
+      max_rank = 0;
+    }
 
     bool operator==(const SummaryEntry& other) const {
       return value == other.value && weight == other.weight &&
-- 
GitLab


From 9514a703d3c670fa48ffec6c856f1459813eb02c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" 
Date: Wed, 30 Aug 2017 21:31:18 -0700
Subject: [PATCH 141/294] Update profiler doc.

PiperOrigin-RevId: 167092704
---
 tensorflow/core/profiler/README.md             |  11 ++++++++++-
 tensorflow/core/profiler/g3doc/profiler_ui.jpg | Bin 0 -> 220483 bytes
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/core/profiler/g3doc/profiler_ui.jpg

diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md
index 5c50a86c88..f0d4dafd3e 100644
--- a/tensorflow/core/profiler/README.md
+++ b/tensorflow/core/profiler/README.md
@@ -56,7 +56,7 @@ with tf.contrib.tfprof.ProfileContext() as pctx:
 
 ```shell
 # Profiling from Python API is not interactive.
-# Dump the profiles to files and profile with interactive command line.
+# Dump the profiles to files and profile with interactive command line or web UI.
 with tf.contrib.tfprof.ProfileContext() as pctx:
   pctx.add_auto_profile_dump('/tmp/profiles', [100])
   train_loop()
@@ -66,7 +66,15 @@ bazel-bin/tensorflow/core/profiler/profiler \
     --run_meta_path=/tmp/profiles/run_meta \
     --op_log_path=/tmp/profiles/tfprof_log \
 tfprof> op -select micros,bytes,occurrence -order_by micros
+
+
+# To be open sourced...
+bazel-bin/third_party/tensorflow/python/profiler/profiler_ui \
+    --graph_path=/tmp/profiles/graph.pbtxt \
+    --run_meta_path=/tmp/profiles/run_meta \
+    --op_log_path=/tmp/profiles/tfprof_log \
 ```
+![ProfilerUI](g3doc/profiler_ui.jpg)
 
 Detail Tutorials
 
@@ -239,5 +247,6 @@ bug fix. `OpLogProto` is a good plus if it is used.
 #### Teams
 
 * Xin Pan (xpan@google.com, github: panyx0718)
+* Chris Antaki
 * Yao Zhang
 * Jon Shlens
diff --git a/tensorflow/core/profiler/g3doc/profiler_ui.jpg b/tensorflow/core/profiler/g3doc/profiler_ui.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..36aa94502a8c3de7915fb0e388c861cd706c3af8
GIT binary patch
literal 220483
zcmex=9G120;#{CCsas8I>5A1R0qH8UG()kY`|EWMu>c1}I=;VrF4wW9Q)H;{Jbx
zVXFWG6C*P-6ALrQ)eH=bwTw*63@n1ILW+itY{G$w>`H|qMvW5}awt1(JSZA;@q>zS
zQc)8pmzcPOq?D?fx`w8fiK&^ng{76Vi>sTvho@I?NN8AiL}XNQN@`kqMrKxVNoiSm
zMP*fUOKV$uM`zch$y26In?7UatVN5LEM2yI#mZHiHgDOwZTpU$yAB;ba`f2o6DLny
zx_ss8wd*%--g@}x@sp>|p1*kc>f@)+U%r0({^RE_kiQrin8CgR5fG1|`Ad+2iIItg
zg_(t&m4$_Yk*S=4k%?K5g;mjzO~^5jJ+V+&$*7S-#A)KfjR!fEje|ajCKX-e5>qjG
zsQMA)HL%Z!^H>vEK7)G<;jdc^Jj{#?OoGgU4E78fi{^{y+jPrhT(Xv74V;wyYgbU9
zUr?FHKjEI3Sv{e*C0(od+wDZo+GLlm_5Q}4w=ckcPvZNR(bIpaH*Zyw3evc|F3^2y
z0UP&U&(}Y$Kl-`7dye_k$H`4CPov|jLav)6e%`XApeMZPN18
zieu$L(@&kB{_o!QyDOiBggZXk&lXn^6l!ebo2z)F*sa1o@YaTfZQh-eG$t;c=El|0
ztaIc81M|xDS2x|c%sMAAx%WV5sHS(?w*L$b*F(O>`kyM6*(^EbQEKz?ppsYaCI1=L
z2EVx0lPY?}^TYaM8XC_W6UaG2kX5G5o>y9tD_v7nV%jDQ+
zVL!`_3l?8zTJUjyyWZQy{h@skOSh#mwA4(D>6_8yc}T9uVeNc@*po{S+zOKYXnuZY
zzd}{`$7RB;nNMp<|8`Cb3e8+H<*K*!<-+FK`=(9W`#5**n);H*JC;S<=gasgXSQd<
z*C$UNyo{}WRotU|{qChp`+DP^)h%CtdSyiPtsT}$2k#tLnZWUH%X{|!3@2_cs=xk4
z{+D64(pDR7ld^vr7oSeOrm=WW>)bmJL)cj+`8&p6|C#68cwz%T^Yq{+%htVJef8Dd
zyZ4rV_?JH~`A54!Wm|giyftgfQ?^8Ft4y-J{%GC&k74mWQkQ>}$%}L=mo2;IooIDC
zIqOUQ&h3`pUeDV7*q*5(|M-6faT||aYk%L^H}m1s{a4K1;h^XG+kJuUhX^uG4}wz-}r_6$GVk6gF9w>`IZP3Aq$i@J}iSFRUQ-!m;e
zX|+sZo#T!^<2mKaUd?0rux{f+?)f4WexJi89rxPNWgG5y_8;pHMr~Wl*HzlDRBh|@T#03#nuEigtt)mv8R5ZmtrvZ?@B^^Rs8(KfUzFzIkGMKFmKH%^iB}QRSM%
zf9Lsa;$jMI7FbSUedc6*>Os0paEY
zyQtfX$z6&r-j&Hs+hcGmkCad_6u>x=y^_?0osH3v2s&
zD~ndTMU>3ibn)Wk>5o&Nru5ri_{08iuJ}Q#^3FY(m8*2Dwrp}={_>aAF}0GOhfmWc
zH|%`Clk%TI_pkhevi=o+0zTZEWNLkSKKJWy`YWR1+{COKn#^;1I1+b878F>1Ti1NB
zUZ_+y{$W}3q95hQuF3IT`;x2^F7|6lCy#ldah`jmq{p3|%K6uCYd_4FvZ2Hy*=Db0v))X8l35g4QvR%b
z_v=MZ4<~%uE~8ZFXH=5+_sY7b@)=A1F1>Zld-}wvlW!~SB|mPF6WzMIiJ$xYs=F$V
zlb$^ein!qQ;KRjLpH@{q?6lRMz5I;h)DutgOiumI3iwtKn7iM6=g)IWi&lpP2L{GY
zJN=nUW6@U2L;EINvoSB%KeGF--uHu6x$7H`w5DF!zbWWqVsnC;>(0-B|;m;W*L_4|Fe&TZv?2BEug_o_SfrS8k8
z3QYgp7<=FBYQSB$a=xioUexh_SQB2)oxZJdefm}Xrn|1?87$=*dR*Hcq$HR<&+zC0?m{IIom#@5-@`~t2kv!ir!b-zx!ZIZc_BeG_t=g)7#=bWA}xfO<%
z*vmREd}*i9C-L-m>9d9DwOL0UT|cEvJom5oPj*lG!k1B1?R_B42d*AjeMdo@G
zZxy*!9zJc>+3H=d?k--u>w0$F`N&LCX(IAYC-{RP%yhGbA
z+<4I3I47O??COh&KYEuPfBaWJ#vrzK(shq$c8Bg~lB^S(74nYVJ#Jgvck|JnSGx=9
zBmOf?f4f)dt9NXGUzPFUq_&s0+ucVyE92I_FW;Wqv^F!0ch#%1GLM6D
z&r(_=zOc^V`L{J}$@U$SEJMH1
z&(OmBPjIgBKf(8J<=^;k`N{nD_xGRi8K3`sd;6ck%%BKt-g|ub`h2b$W&5s_s~)w%K~wBzP26q&r+RuAU%2Fn!%T(E
zpXPl0R9O1LD$C}g->GP`CyUK)?ftF2Z@=<;wGBI#hMqi>XLG6VV^{Z7Q<1l)uJztB
z|6a2$VSdYhh8x$VUjLXJ7OU)cX??iAx!Jd8PuHm>M7m_|EZWWX$y0)*R9!T;j{jD~
ze}+Tub{ij=ZoIr@VyaGg*tcB!TaR{zWy}>XdhkH=;8&ZT;?2n)pRUXM<27A8YJYp~
zR_m|(x9_~Lge}8t`81`{w18Iu>K}PT&ew$7udDIcdUZ>^_{}}P_WrrGW*XbA*TGX$
zr{_q2uk7JCR{lhTUDo)R-^FR^mQi)@4{x0x5xaW%1OH?7AoG
zQ|&_Ds(gR4bxO_ctD1joKV(!acKRJRcjeu?ang}`4gFqIZZ12TW@vk^U{hjY@6sZN
zyW!yn{Y4P0*g$79X=gbE{YI%oXC*OZtLn?@$>w>XPauLN1Tn$4XW8+`zvP4rd0u3LSkm!
z`}9&P@7Mk^-{O~b(_W{&p5I<3V7GMTkLK8Z=MA-Cj}BdavyClm?&JJ>Mn@!Addu~T
zDi5%WeEH!h+Wce5>seo-a`!c>uE<=lExWN;_CT`qF=qWxE(eQ;JC#L#pO^WOeY7lG
zs=h<%YVO+wSM#=VbI-L^uC&>C)BC8BF@xQs2?yo~RhY_m)M-RjwEKRXqML5I`qih*
z*Qr7RFL}&Xoe;e-p?G_`LQ(OhZQaNBm%YucRZiKtI6HIw&NFx4r{8v-7kFyw^NhW>
zu7&4M%KepUKha;F7zsG4amjREt8tI`oY|=T%*9+rPA$JTB-xnrui;7D|qto$Nduw?#~Trf5hLmWwx)>wIkW#
zP3MmYWDDPo@A!VI!{C&N;lYEc?&E{l$OdtLvVRo>pwhye8{zRo%33=0qRm^7lrE
zC9DnSJQt1fZ(U`&P^Rig?!x~JslAC?-5aN@-Dq9et@8f^IWbAqF+wGX8xbyR#n(i>AibTJ0;ehxE(9YY3*V0!b72X*83XU#Qle=bnZ!f
z_%`uT&_?5Iv+Dv&zTZpxc7nt2tMBZx8OPUEt}{OVPft!`-pSywYg;?sPHdmQ)??{*
zL&+Om(Fq46fA$sLTK6*Qd5fH`(yMQ`!!B)&wGEBk(kSV@XzkPq(-M-452)n5oWpf#
zi}sbn2e0^^dcXAdt(QgnZmo`dovdT?;YwktsM?e$&+NSGPxI&I*GiZ7UcJ%zkXPy#
z_uHb^0kaR!;+g(FGnx7J?Ac8YJRuFI4{2-Y&Ux}ht>s5?W~$iI>eWA*xO0`Ry@_Qvyw%%F+!7Z9@NgKjQ0r?6FnC#yPt>QDoMdUrX1_idxbuKTE{M
zXI+TkC(S#2#q;>G*2L+2IJbDo{fzQ$;=kg0zx`*}e#5j|=hGoCj+-(brz8wQEm;!!
z*eq5C9aDW@toKVjb=mo{-}nBOUHYn#`Yya*``+1|ZttT%tGt!CpB=sI*UiHZW6HKm
zz1)9o60g1Q=IvLM_B7pHDjwkTLU3vkhl<66?-D9}nX5l+&0D`_>E>(7vmJJ9m>!fH
z6u~8t)Y)}PO2PATg7#tLU|YR*^sl)&a(RU16<<7w0CPFnRe-W!KatQ_u9>jqG)IEz*&bGQFo6
z+`lF3Tv*K6vVGq^?Yym1rd5&g;AxoOr1F4gH7obt>U}eRZc~@Wy3HLLi@F$wGrsVj
zp`|+HKf{9D=YN0He^~ra@NMwF#_jXJKm5;dXusQkhK+0LpI?&y{UGpfZ}NWzy9xgp
z?lbL|`Ogry(f;!@`)d1z^^YVT{AYNf7GGp?@_#f?jb49o|C?<2
zpI_Sl{Sc{7j`+`Teu@3}1B?Gw9r@32e#YN7_M_&EhRBavfyF>mnEWCWM{)cJ(KlAwH{|tzn{9Eeyzgorr
z4EtD<|Lu6T|E&BUmKXmSI;toBXVBUIL5+Ol>tk9>iHTO)uaE%xKF4)CV6?Ef3dQ8jJQD*=LZv
z*2Fka$@J-~=uRnBjdPI>=UI4)|9y@AcToQG@8bUqef1wr82K{J`u=UQfBxBshSxeD(2CEGT8nt1G9X7F>r#hD~mmW7EEIG)eC
zwPM{`g?-zrt7jzdXmV`t3gua2%V8tM@cHATy>18H<}SVMUs%%i@JWBbRMm`{{rZMK
zd$r!h-(%jhS%Blzyu%f{Be(6`v31k7-@d2Mm)fb9rn>Z>{>hjbz&QPBqs70M=VcfF
zDE6FmPTK9#n_1Vy?#r;<A)KE{|wa=uiszG
zP+T6Tcy-;wD{-7TI?26Hw?(9}F6Z0ub9N&Sv&?@64(6vnw-YA(o!zflb7H`()N8d)9zJUru3ijZA#qPH1`xIL)_<0U;YIB
zdD5I(e(C*_$>o3e7p_=8*W`X*o#jWdrAsS}j@sC!KQ_Bw>Z|lSe8=W_xz?vQby@ms
zjPAVVdx`U*G|%zG&z7}|?Fv8Ke%#;rNBzU|hu>REs;jGW(*w3#Gp)Y;@!?(X_$EWS
zr7B4h4>lc{Z+)V^>W!#V|I}_q&n=T`zs6ICl+_ZzopvW$Pe}(81IlUh@ALp_k
z+h>$r@rC(eJkRT2YLivBY@59G?UnLFS5p!-W~k&!$Gnr@op2!6y6)q4vE?VP|Kp0E
z?V_nJm;Ay?`+0Bnx3;Ta;(Mghx>^6tTvt)``-uF9{*(F156`+vopY9+vu{PjEt{^F
zTNiMxF{(T@F($mBKC8YXd{$q3<)aK6Q?u4e-nnN!ytJx46Ye!V%zV3s;Jk;|HLj`1
zPkArYlXLTP`k9@!i*Ns`U-e1v{LF)STYqN%^FQ<#ws_pHBbQugU?EWg&9T^>g~1srFb6KRs#*S1zwT=qZ1+T33POO|d3
zjEnrO|DWL`+lT(Y1(W|XxO;~GGp%^{{=hG}StWO~{V#sC{(Ui@Vb4W<)0OH>=4BB%
zZ)Me{9*CJEqt&Lq)VWModtRq&&GU!(x7d&F7ql^dz^yBE+vAc=@FVZNQTHDGX9(G_
zGuu1m^0JDhrCTx;XV>%ytnpVeV#_@;<>2cY*T?>H`+N7;KhPC>eRx`&(qz}^kypNU
z|FwGb=)x**86^wdX^+fLZ=U4sRmNaZl=*dg{YA5XPiE{gzg@rY{=D|}7vqh8-2c}9
zaas4{{|tf|yUdzyEvkrIldyNgrVsm8ukc>&t7CXp>tHE^!7WzL@ccP48_$*T7{1

)*9C;l;}{r&ZM+Tl`R4diKfOwU^X)PJcVy`XTiQ`!{*4`@oRwz*V{W^Tr_U6(7`(_VBA;9*S%W{&}8v z%_OB5&w@NA7e>duHOo$fDNa|=xf?xu>Fl4{^|Ss>-fEgz^mSLfdHtvQ%i-U4X+O_z zlb8CF_>uX@uKx@z$CfVjT&dT|w|Uv?^$&T2eYORB+LF_D>s4uW>w%z6N3~Yoy^uVo zzAav8PyIvxo8bqno~>MyS*P)lzeCvV@|M%HS8I3tnW=ZrV$bc9QqSCWXzwr7ek!Ic zlXqgt>_0-Eb9dZ+?_2hHqj}WN{3mOc*6+N(^DD3D=W|;>UOu)@=11k@{|si=ov!@J zd}uH5@|VIaiL~vP-?v`AxykeC%-BbEvCp`E_P6;wZM+?K^`UgWP*mn7^)6Fg{g5j> zOP1Vno3A>>$%aSMQ+>uX_VTkkm<&I4iLH8_YcDeY$pz^t;Vm_JKOR2PXaD24;X`|y zZjkl1ODfmW_s%{XHEqW_oBMj&rCrB)CDZtP))g$^(VpBm^^yH8?W4QGc7FKnW+gXw z$*Xt)9o>}6x?c-Eo?aZwyS7~ROU{dTOgjWyIHFuX-DYiEDqnZ))tkMg@w4hLhZlX% zuj_h$+5M-Aa1crz`U$HXZX!xwG}&58-3`jDNI0=B@g4E|=Z^Ltg*dm=|jkZKqs%Wv<3Q z&9miEYM^qe&HQw`<3H&|-F%zCx{)8+U-Gi(3(9oc*B@{6~hulxU*`uAS@>Wgur z5BB%oQ~6jU&-3Whmf7C>MQ>;G+ssd&c!^WjPf255hH8=9gJ|^y+Md5n{XcSte%Rji z{3Cz2$=cM(@z-_U-2ScfFpe|%ih8fN@jcI7jN7?6ZiPCnpHVnz+S>Kn{~Eq{elPnY z{g=ON(Y}%o&yU(BANRNQHo%hbtojONPA4{Eh#d!a$ z*UyAJ6F5#BF4gbdFJRL7zD8f+!`-&bXUm~GcZ`>GaI*5O z;k$U^?x(HqO{RxUKYRN>!=(876}I|ybMBYq*Pop4b}@$k;C$(IpTh%Z0_Ma&TE{t>!Ry#U)%gApk#J^`mFy9-`>9eDR?KS)6T%4dGxYD( zbA9&D>;AK*xL3Ykw?97npW(#j*Ie4q&Psoq@G-r^PW61|@)yFf6?yW)ryuUrS@7Z5 zN|nX~NBS#mm9Cv44cu}Ds?#@Xyy z?-D0BH%|8Y_iB2{{CkuCx=nrZ_}TS4Z^}%1FCi;DLs{a{Uc6)xKc`hK7GH8Ey8ks410Lu`c`e^8XCk^}A=;*Uc8XJN16{pS|z@ zxz79SS}4!Hr?C1X_rcGvu7AAkYL)eCY17(O8=Pgge);xsUc}s^N?wKV9uV-+U+C3VimZzACql^I!B$ zdC!Y+{vRG6`)Bf_cYW5bGsYQaa@-rQ>3=D`EO#o?RI={0&IFTV4}6q;FCFvfKWVSV z_h(KRd&57oiuuRI_H!>3d?{vrPyfTUrK`>xZk^`w_FaP6X|XGx8x5a5?AFYg!ys98 zDQaons{agC`JdvSnJ<|W?`!uw{l~obt`*^L&f6c+O@6p-eet>H|(Bl^s~I6Q~8jdvVy>?^*bKtX8-ul5ODR&`5t@D2RmYg{xkUO z+p}x?kzFN=KHYknqnDETVD|Z2t4{4UGs!79`}vc>w8d4{mGQrR_eU+-=l{|Ct>#C0 zmyg#rvKah`{bQY6U$sbl!w&P@{OsD)oY&8|8)n~$d&j$(E9q%;{psBg-rsUQx=;T{ zw}{y;p6haA(~pE#aYs2n>qLYhd zH_h3=IxmtV;-cf7GgVr1m>g9ii)DO?r%Pr5YwY}J@ zJB@q73ci=B9Ji;I2^3F^uIfLwxBSpN`K!-A`Zq4WymfW{9sf0x_o6?p<@!hL-o16-rhOYXL`T;eWv53f zWaQp_a{j%5@%ys-lPn|-uBwsw$lkSwcV$MM(#P7hPw(DaxcuJIqir42rhU*|d&OK@ zOy6|QbVGs2r51N5Fi3l_>wXmecJRZ0h6bDLwHcSxChE>>E@WM|;(OE=vF*Q~MBJGj zZN|DzR47!ZTwQT$xv0{vzub?$?Ogw3`tfVq{_%e{iRZj?Hs$)oR~dQw>x6DexAtj7 ziiRsCEZNh*ynXWIZPM?XejQ!j`}J({;X2)Jr^}|lSLAQ?=G@Zp?&;@C7LU@mO})d> zeOx5#;3db26Btd4;+xFA&y)DEeBs?!pJ(UG-ix_&T6arb$h2(JDPl`S-dUdzcz2RP z_Qr8Z1-bhC-+r&^bS~IbF3;E|Cl@dF;uhyM-Tj+wyyX^WbxsQtIxWL~dYJ&StQ zc=i5JZF}BcXZ&MZJZFv7W%sr>U;i_7$SGd*TWh=P)&BI&UAIg-Opi?dJ74QuTD#yH z7q+s1vUQKs56@>P%_!aLAM{bbNAX(k`?lGKnQr@EyzJk`!JWs~rBY+AJE6$Pjj6G# zbpC<+f|*wCO?f8wrJ|!N_8*qY*`@XB{G#_;_iQYdf46ko=Yw}_IwGu2*`+4;m|b9u zioc~Fbmh$-N%8g?>AwrtS2TSOysUn?l-DTA! zepS~e|44oyBPX%{gYA>5JJ-wY9O|Pfqu4o#nFG**-rw|IU3F&zW)m_P+FQ z>|J>_-p~K`%hk^=E;XOHz5aCn*4+D(=gz!k+z|hV<#*ZAHL?F0^4|Qr@NsT?_qN)8 zyZD=nw)MaHdH>w|&FS?L_rLpDGL+a$m97mwGOH`&9>9h6D?9DE`e3homT-H- z(@EBjlKMP;KgvIgHn=N##LRhVpV56Xdj-`uiwPkW%y-7N903u^-1 z*{3nJ8?)XyXK+J8VVYFge+IU%ca@L)>Y1hAyY!Nc_R81H(R(f_Z3r$(*IBPqcsKv2 z;?(PU`=ZUyJ&bjH6j{e|E55@1_&vG3SJNtLLRX(%z4quHCrh1_e2wFqRhv!nxpt^9 z__G#XVX~EfY_~FV#p_@G-G5YmiEUk$ad!RT%%%5vJ@a!zHtp!$v*Y}%HKJak$70Pk zGatFnW@qq9UUWhB$}im3vR2U^>vc_KwIln3wiWz5V`Q{Q;&~jmnndwo8L70^57kjt z$F{Don13uN>z>Nqi|5)iS7e$o`^DPbne^tPVr8*m?V6Zv#!?ImnD`CF5AsX?2(^iQ z6svvYSLyfuuey>Y-O{IR<#eq?YoDyWdrEwYztOb2I~oimop`S>zOom#sm}j!`C$33 zzektcYIgc?`}WP~2`W2x%nE6fm@<#;rHbpx2PyNe9{SP#xV&?Z=!5k_ukI;*V7~ck zmd!LJ?Py;g-^{x0#%r{n#+rD3UZ`GV#8b29;o)0dH;bx7MpR(ut2X_IN(j$SHl#9~qTZcrz+%*`s^k zu02X?jZHIH7#8l%v&_z5hl)(!uk*+EXaCr^xqA9MTrR6dq>+i%@^UMO?JOT+kx%tM!S?e2+JDhVD*nOt1C z$#`Sh&kYP*to}=CEIyuZu?bxN<5q0C&&T_nUwQRTd-WZPE?#u&xQwvBVxrRDg>9~f zWkmBFpD-~sRP&0qK6w3~LGYDT%BG6>ZkNAUe>4o)e!0|qfA8X3JBpPZIe8}?&bxeo zUEM-@3j=6)5Z~~hp-t+;yW&UX!r5G6rOW) zE%;U5W+zgUb?J|D`QrG@I;|~NzG~!s<^Ibal^JDyL%GjliRXjqB|?kbCD_cI4CgJG z?Eh`U<#k`}8-Hynh&o@NF8gkN;mTXlZkIgm=F7g}uV@qzdT?OVQ}=t45|&2`|1*fy z=~TEMna6(7l%M^_p-c8`_8o85)C51`=ehq=>D=kl#qGOWni=jXxEGvC4!6|2qMH7F z>GvJ4{;bcRQ`5d}pYPw3FZTRrIDh8L#}Cp6IUk-icj@-txozv^bx-$g+qvz^yN7$d zJIy&$%`-LT&U1JtJIg7?@b0X&{~35*KakjCx-cVbmhZjk$qPBB`%d?|`aWmf+DYN} zwRF!I>)e*zsS&gC(36_Qa)J+IgM&V9y?*$wY)$wh?e%PN6_=wnU*4*noAyleYpj+w z=acr*TNcwYj;uPEde58V;&z!Q*XKWa-uX}RkJ<KkUVb4TDO4M**tlja%D^Rzv=F6(QZz>nr*`xHO) z{JPg%`si)YxATElzKC2qe*0c^YU|s(3nI&xU2PLP5Wo>L!O`;L{=RwYAN*VQ$S$lA z`Iz5R!mTg*`WB1XuKQg!kt+_1OpMSAi|#iop{zBO;{ z$LNDqehbrA{Bhj*qkYl67gnWvR_p%KihAk$?I^R(M4iG@QNgW;Hk`Lu-M=^N`o8s{ z*DiHkzV!Wb_R@9fYKvLk=9<--PzucYTw|@0yAxqsvyc zE0=!0FZM#_#NiV?B0)Cyj&Jx;`7!Bz*x!XU8oPA=o)O#i_T9$Z6)&aQd)96B-uh|L z%*WzV#mAq#JR_$N;`Xp>5{J#ybG%lD(@U$X+>9j0nm-&rSkIn(rSz|rt+UU@&6}sr<1~%FJ=@y4@b__nD4X>n+Mm*MH1-;>XT2?{ zjk+9nDQBxSXU%Wv8$!=G%ckC!UXhw3z;Qz1K|}V_{B}E~i#3iP>W_v?&F?L>4Suv{ zpV*c7Bl~3cU3)&^hX012t4f~L>s<}}=b|Z<7gzN2Y(dyQ>yPg3_383bw|^{;zw$+2 zCi;bx>$5o}-Y=)!vX$-K<*wVsBX8>MG1-vy^u%+=dR^C6UjD6VdOI%r=GR;Q=A2*f zWX!r{AXW;y2clv?9 zWY(_vttM~n?r!lHR9kQQW!m?(+_ftec2B%}Ls4Lx`GmfAdlFW>*=Lw37SE&eM>xIy z(R;xc_t>tQ9{bPGGCOwfN7F}pymX!!td29;y;f;bNsqkdVxNa@cb|yr+v^|y^=FOB zyDgvph=2WY=S$&l+aFx^9GUy1n%(A|_qnXEc10)dbnx!&ySMC^dH3lg?G;aj7bfe> zy!BYIc82_wBm1%QE+=&42jz%l7g(^-F4djE{c1?b~zF-G*n=rk^>U zw%)vZKBzoZ*}eUTn}6K-yW5xF5#N%kcYn>KvUls=*X=jH^HslnKA(;2BYys@jemE( zH_eW?cJ8y&)%=IsuIN7OuSt8jcB!CWBmb%NI}$Q_#a){YADCzLo;O4FkholCY2vrl-+#%y~mk#;9qWzKcS^bg+; zU$c|HR$}qx#BI-OX}jnByOf?8^zdEetjA@KDvI7tmEn27pna%LE~8H3L;G9fk7ie1 z&uWp=-d!DiSKi~wpTMg7w*HGy2G#8QKL7FkldqSb`DJ`?KI zw^^^fX371oV{y&H#aiWme%?J>9ugFlV|z~S_k;Z{@BhTu_*~o7x^2y+qc+D*N9@_w z^va__aPQGQL2H6HwFd4ANp59cAgU72{zp6iab-!M4qP0iNiWt}WIv) z`>AE_I@imM#iuqkEbw?^uppTsT=ut7#r8w<1-{Og{o{Pe#`)pv-FI^$fBw>IU2*&F zw(03y9=iLkKH1Dz@Nh}m8-@_pJ)8V4&3kXJQtb-l`!rPrQc4So}R-nixx=iO5^XDql1UugRujpP4t|LA<)D94q| zD>HLdtXk*Zn_H{9d-3|SA{XvByn0djaAKRI&4cU5taP^B(r>ks&(3@lxU3@V{Ly*J ziI2|*P_`EI;q8L#IfCXGp&-2PI68_g|l)g{j5{Hj?a{XzWU*RSupbZaaZtv$B0 zI`iRL=Of?a#HWXBsop;6gkv$oXY=FSYNw0Lc1(3VcCl64Rqyolr;9SncfDNn*?Xz0 z*_-V(_ibbEFW$CmZ`}L!SK_pPa6Z(Ry7yvw)#e{v(Q`#ZB0TdStqrS6Z&}#s4N2Tm(?fpnKFH1Tb$wsrwxiZ1R`;^$+heKXoJ{_cR2~=II$&g#bK+py z`7`z6`xGk{eLvdyyrNCN{abKFxc{CH8$a6jr?E~ciCla4tZ`#F zByU_=<@=wZv*e|n)vGXh!9Urx54%@aY&rkK^~-JFyvIIUuS8xp-cz`E!JcBfCx$yF zXRWGfF7xZs_58j&zHi@p-|#P+cV*hE{AXaz_wV+fH#`2?ja%|k3K%V|B^n#D!}d&mIBW0r5TL{~3g8 zv&27riT`l_`Xt@2we{;?U!DF>;QYhibMqgWEnfIzv+GBbsJ1qC z@9k?!<=>`VE0wK#bnTJ#wNrl=er2EG`^+Gz^;XQLxyzNi9@MAn)?Kp~_!IZB@}q2j z#~$^6&DuZuy|xNQ-1BE$EWL7($LS_9`_0FWMsB(&ctt?=x9g92r(-Vt6ZmoM@R6`8 z^A_pANk40RS435A3*)t$U9c^@__(5jfK3ybP;bzWjw%`(ghkd%lv(nd@H6llqYI*=Ae$_FKEUGned`xJ*@jmh+z7 z)*F8I6(1L6@2Qdf@cNN_&zc2SwyOGzam!wGUHqf%vEROVC$<$lx|VS-!#t(o_hgk{ zvwr$J&9djZU1R%^Z|mBKAKnK(`#;)tX{&GA0oyr&Foh_WsT#Uvyy26 z3A~>#+}duldwpp2+G<;S*=Y~fuZpX;w!J?6kKOWXR?830XV3Osz4zUOJhiL53wO?4 zw@j!x%W>PMCH%YOb=#hPI`y#I{9c->+7IK0o_mt+X+%f=jt%p?^s038)%Ut@^U@rc zPMfYNT4QX%wYa3?IG5#vpdaCn()#O{{E(7m@zzewhzSBDM<25b+`X-%D85X!wX zv@wUlmObn3s#%58XZ_pn6dcq)*=GKsU*U&xogW=rUHb5Bz5kAnOZS{FUYYUBzf&dE zv+HNR$*-E?pK)n?mz9t1w*4GfWBx(?khH#BT+9!%Z-;&#xy5uR@@#tcBhGxkN!x|A zPxIgGIk)Z9c?oMK4b}?gS)XgwAE_U{-~FFqi*9d!yN&O|T(j2)gI|Sxvnmn4_VHEK zvgIoNvgd9W+)HRuNYU~7_wm@nPsb#GAp0pnLxrfnxTC@SOxpClr}J)AMZSHSx_)xhmr`4q_4j}6pSP>> z&aW>0)_DRSTH8C{B~1^w@a1jzxm{O$s{X#+yN7k=l35uIb5vLAoO$!%XUZf`)002! zANC)-xlbhPO0HyB4aR?vPEp^HwEijck!$h~WLq z`vQAZAFbs-GJWE!X~i#osb0M{clFr{o`7gihW`v6o;(eDS7oE$J>IANxT7X|!H?hH zVn50*Z?y?t93T1eKZ9WE-fPkR)1<#_x-1_*$+1vJ%fqlT`K)x?F&FizY>R)d&USw? z+tc1R|F+Y-?wQ}qzi!TdUYZ|PEBP>*zo{%v{DS_MdV!4cT~@D;{W=_8FC=|_>#JpE z9h+lq-dr~pDT`hAuCUK8u# z7;|G|yve~Oui_Or6{_GNqSKV8}Ee~ zoS%-}6luG=`VoIec)ZYmhLrq{pgVWnS7yDl-@ImBPTr!OTc>>sZMd~`&Bmj0PQE6Y zP24ISuVS~ovszpBX8y@f|MXoyMP;suG+Tdl>u2Bp472Oa@7{hQ%)ZuJ;0OMt%K<)2|s{cf%M&(Qz#)_n!vE$eT8_|MS($B*kj!-Md~uh!iw-<9~AMK9Zt zIpgj*^O?`uGW+?Dz0_TpXO(l{fQsare>^{e{XVwrQ`@|C|K=;HJF-tIY`^+mdE=xq ziOgANQ^npYzKS`%W1i=E)7uxFE?s-M=#%rkf7Pv^8ML&D{v*H4o3~W=M#o=WQ(~)i zhuQSo(xPb}+$T5gywfT(!P0bjO!?8f#fSIF{3u-a%1(3tWs|dV(rNcgSD%#$d#Na$ zeA>gQO)o^hW@2Bq*5;Czr?MBW)u{GdQLgtoUTBZ8rX z`tttf`i=)b;(uFLNK~AAS~Al#*Du=E&VG}()W+R=XRdh^@gd68a8oXS?_!3>qE{`7 zuFMhJRu*r&FMr`_&Ye!ewW0T4e~R@_y8fs3VLw;athkwLfAo7^{Hhz~ukF8i|GqBH z_3{e2owAJ+K3N1*T|FCpWR_c_-CVZit0FTuXPGlyI%U1PrE}eKg$-Q_NAinA`>vim zU{e?`{qO4GH}kB1EX`bzRrbw%p5lkzxzie#--+aWe(ec&O?P_Q(Z}s~*Gt#>GBj<}=$$eCfTspmfo@v0e z*Pm1Sik81WCHP)t{cG*aXS>$hZZVy|W65vV{kt?@zy6h9D}E~^{7_Zi`bXON9h|HA zC4MY&`*+{@n`PUzd8H#&E7rQ$wf`J`VZNACh^~r4!fZC(dftX zfW5PWXRa?9hX~Cv)pxpME0z zT~*^hgSPkkfBWTb%@4T7yS;A<_v+fRr==dsTP{4W&UJfLl+<+3#kHKxozWw-a7UAa zB})^B^wmf68P=Pe6@Rd8TaABVdZygmwXfS_=LeN|Bqy$j@%ZF*{!^>Qsh@df6RJYC z?_d6E>zDrw4c57@ra$=4AZu@YX|0{&kFfs?hj<^Iju+1OZYLITD|nrAy>KK;nAX|n zZ8sYnS)Mm>tkA2|{GoUH;Khn!eN>ycA~TBrh^7A4ojS|=KIvLr+*-X-VaJYM=?+JOv!4@0d~e<|GG3!L z>(Fnnk2(LeFPpq)^?d9bRqZc%=f=WIV$Xc_BwYV9Oi2-36Ld%QgWLV>C5L&+d9KvH z%YK>npW)m4=VzBMn||$Z*!JlAAD;bZcv5=b{j&Gpx5>4+D{nehrWcCdU3w&2$N0d* z#}g$|BO?kNSp-~DD*BJ#llV~jY-Z;AAN`&e-hH*K>EG6Z`J0kXXf6|KNhq7%iTZQqt~zg8ZTdw?DSD=?!&*{N2>hhug`Ke zvx>d?B``LY=dq#78OdMryLh86?Fn7w58RKY&b?B8&u!*fUyH4qqfhia6Rz5{ck61;+|JZ#-peMxIb!_Q z{dU1Zr)zVz&FXz$cJ)8Qy1m!WpE&9D<>%YBY`>tR3>bksSo&2o|ulq-Xe>^`N@FVHr zpB1GmO{+^cP5t>ZEa^ArgAe@4QS(2{{gHkC;==z7E$v>Pw=OZtocMQ}?)BFV!eMFkxL_P1wju zN0_`iWUyQ0&hxTh>Ehf|e8xQ?Qy?cS@m${j#A5%~_v-&zrtXcfD*w|i{%`M-{|px< zZC}CM|6}Dxt;-MHTaN86U;4`G-tXJKcD(jp0<$}x@0_d98Gde(#j!Zolj#9$zJVXM zA3M+cqxtZycdJ)=syF{<*r3O|Y-{GD@UCmVX1-kyd>-D?T{B5pd-W!P!#tI1=HK!^ zJnMa@oowXshj+#51zv?&{p(*PYjrI{aazXOtsLr8l>Jo--`$i>FkYeFbG^S}_k-pA zd$LUDOK+Q9z2MdMeR`L-Uiol!>fM&@uhTQrawp`^@XW|F-#$mQTj54?Mg9yQ>8$Yg z`O|ljy@AOYG=;Yvaos{o8*W4TjZEDAAY-1I#>MYr5}?XAKqqOz?ryh)}wd+n_X4< zCTu=3-OI8x_~`_GpZmA^vyc7jyZ&|Sy?wFL7i*{IzSXW;mtFfUs`6XZFV*jF*&o`< zKN6{vyK1^t+WT_emTudi!%=&#F6@$iwqsVt<2KzJd>-uuTF1-Yu}j+C|D$SU{>}H_ z%D?Lq|ISQEi#~mS%ks%Ni7GxSH4n{16K*Q!U7nM)NB`0GLu;!a@vZ$R9oBx>e(!ZV zskwZIv`^;ldAc`!S8K%-)_P+fF-Ui4d`+;hHy;alQnzkYt^lK+#f)mgJI2YTMzwl%8u z&c?la-rbw$f8kfY_mBRi+4BYTYYdmYs#05M+S_$E_GtF%qpELrZ|3b*UDb4|HNs%R z#49|-v$ih#ZEDAF23E$k+Nqyv!p&p(knLi0IROgLHD%c zZ)NGG*2%rz|Dji{twjHAW9hP=jAt*Md|p|f{dv#Y#s4bb|7S?r`JaKKg8kU7@FTL> z-WN;WRv-GuzE^6`b^F`pQn_dMl$Yzps$ZVPmc8_@@=0aggItV<8nX0$icGq8sov!7 z^r=_ZE}vc&y7$@a*YZ1;)<@fae$$`W{OEd9-lLB^)As3I)USUi+xzsI(D{I_^2=8l z_0mImOw9QGCQv6T0-?jUQUS4{f_O_3N>&lAd!D7aHi!$u3nA z>rG*MpebQ_+R*gHKZDCQ^&it8^sRsB{hxv1?8CQd*X|wt7WXP_Qsk>kjy6H3S#(AE z4rz+1`Ce8@7vfoG9Az@O>i$2u@cJUZ+plv=QkLD_@)~qR_~)vh@y^$D{^|UX5BRX- zqwe~iJ@&;9FB@$CcfCEN@vY4z2hQj0H+QS>q+6PnyMO%NX5*T_EG9Ef`I4!HsoO*y z)y@05LU(5zG~(#m$QPn5#yuzD`O=#2^&gfW-DiIF>wUqP3OipW-uR>Yq^__v)|O^` zKF->+ad!90nw4>~NBBfzXROh@^H$1E@kjJyTl2&JjIM2w<<}xs96YRcc+l@arZoRcD+Oy3+AWQh}*6sfpGQa-``pfX22xq18N@Bdz-9sfRl z>u>#zdn%XK*u*}(FY&U9z2i@L^&;n$FQ3_Snl8C;J1@ia#15m=^6y0#1_(yiIrp~o znb`RLcCAo9e4jh#PwwBn%XGJ`Ub1iHqMNV8SEiaR@2I)jr8@bUp4-KSB~4Xj>n%Tu zx9(H;!T#WW(;m);@3~*wDg3+8<@PaesCNot zw|mZa)^lxr`Q>h&Ki?mtdB6Rx*U8o&Qcr&PGjEM@tyqY$XjjtZpLNlk$*c=23ld~2 zOie`=bGqiO&o19G`=@!zzVesV%TH^%?w$9q&olek)Z1}hcXMwY{vB80>fZBk_vNn{ zru>|lVN%-`=1p`vRIzShpM&m<9iMIXo0--gG>EKqt*ZDj{UO`O&JPnlhIQ10h*w@{ zTg-oZ+uOVImCOF*KDT&zwx<1~@wfCJ?W=zH9~0jt);6hP(xY0lYxi!hPw?M%!)#`1 zZpG~Gx3QC>*EASPnmv}};e7X}_JMp?oc52I%RPRTExP44|8X{N7-PyU7pw4ZHGVT^ z&TQOQJz@7@o>dbknDel`oYcK)vFz;4wdPTOrtB-LS~q7y(Knk<_pWcBJ8!<(zVPUI zJ0J4jQh)fquTCL*9?vD4;H9w@&0aGTvLn87msd~wwCDJzz!aT-5vTT_GTXGlRy+Lg ze$f(nf%-$?<$OCI{w%TQKl{{w<<0DXJ1W*m{N}gH9z;*EZVXF4^v>cOyY0hV?IUvH zKb}7BeD>k#(|e7N!Zz>PeqDE+?w;1k&JVRudvBliu5yyZBfiTjUgaJ;@4Rj(j+0$lcV1p# zkMD=skMt%!QkWK}WR<$UWsBsjSDJRWmql8Z%)G%KV(xmwpwxn~TvH}AU#v#-V|=HL z_#v6|L|_U!=+Mr(PfLag@@lNTlpxvIyiMx z-;2MlI&x2L+NT3? z{;aoSIPwHm_FX^cZ((*JIdJdCCI-{})4KmNt$|rcXsLh{|tHCpX@4)3;(XU z&nV;hv3YWr)~avZd*N#>*MEk%cmd~{-kb@yJ#*SwnAWV(^UlA=Ao%^9+noOl;x^p5 zD{o|8uDE>oVwO~sv+CT%uQqVzg~fk$Gtr;U|3`&+_qOcA|IGjJeRMzeUa-PAY~$r+ zAC^?*b*?z|O}seTGw=JdSEsV8tzSG&7dosz;hCdk>d(55kISVW%UU0k7yMKBG23zT zk7}K&=Up|*3nOBDzJIGOk-Yu3GIRc!(@RpO2Qy6UR`}^w#JXwU_xd?&*WFfqxwu|P z{=B#T-|fr(Mt{D&*lxu~@uU1~Kb!;m{jcwdUYIfclSEV0E2}AHpSI74aBnSRd9U%X z;>qG<3sc+uNA$Vw{@7N}b?{NFT)yx@`Pq7%vVVN5r4nS%a^yZpXr5$!U2(x5``z3A zE~p8AwEL`;rB2=-)1@!Nc6ColFYl{9zN26A&SD`4KL>VYmbVt4`T1)!KK8Ef{vs<@ z&po@^SN?cZf4Az#>9^kYgqK}sdpS+jQE~n`rH6_I7hAX`4{v9hQaY*o^}o!zxbNkg zVqMj)M0;n~zcl~z_rC7^=V?#>5&7}{Ke4*@{6}mb%sXw=A9~L<$=XoSd_?$VXSNCp8%syN>Qtka?dQ4} zf+z7Yepr9=^r3rdAH&=0B+iPrN;_ZtW47zfr)=HQyyPqYE`4^J=+f~lz}sSyzo`IQ zXF+_EZTPo>ANIu`ydT_ek9&9f$o6}hTQ4YV-+STdN0I9rIWL*&i|z@&GdrU1=`Aji z0E;_fQ_6C`#rK>0{?dOrRrk&A{|wV(cmHSbc=P6;YV4264}1SJbp6q1Z~x=+@nliX zwd?Uh5qF(M&dvUJoyRCQsdL8yw_87*Ds&^OmcQkHxSsP5>!mu)id7LMx@NyUn}fyt z%f)BB*I`P`<@QJxI@x=Gf6BH?1=`{d%qBld`M}>*!Y@!^ANpbH*=sA;#2qwSTAXZB zbiA^<*d(#;Y_TTKO07H|k(38wuOrLkI6mfg{AWnM>;CZV^|rhcd6B4!PHsH@QDJe$*?kwY|1+pPi`&hs@?7-NC;#^`>n~mZ8~G~v z(SL?U|K|2?|6UuLePa4g|1;!nTg|YEe@^b_?QibRGn}`zJmg3F!&rWoP1j2Gg)@HD zO|06zwv>DIk44*e$VGe3c#_6-IBD|-|5Ho8ukfTqC@9+r@o6QEI&_PNy&UvFvlq z6@}~1H%)qW#yh|6)EnJzhbP(|sSXoxN=a^2Te&uVRs4sV{|rk~=I@$)@9dweU;gMm zub%gLVfL-`E|c!qaJMVDrn!1AZBAF;mb{js7XMy(rPkp)Qu}>FLL@@DjQkjmt55b@ z_*49qX!vif)4zj%d%r3B&v47m|K`7&Pxa?M&Aa>b{jsv=bJ8WBtqt^GTJd_;?2KKu zPk%++&C1Ue_R@J17+2XLd84s|wcl|P`;OID{Ui3x_PMm~-PquQnlgQbJY*ik5t=t%W-Y}sMnq#Y)#^acOCC4qesb;)ex5sFDRL$FEF%l8N`<`Vinw(hf zd+O$M1u50q>(d{9|IcvXbwb};`}fgLx4(RS;o?X8V;3K0{4vhfZ~Eh1{V2DJ^IhVH zt1~uL-%x5$ocXl4NcVuhk>RH^N2W76%RG9!{P_I#&lT~9rT#we-qI^{_{av;V^d6L z-&nI`7Y`%XN^MOYbB2(V=A$)#%Rks3H>l8mxOLv{1+VuhU-~kmw6JjDkK`kn=__xT z9yxI1n10+NW|e^3(;Ba)F?IxRb7ctK>;16)$b7yZ`Ul=Gd=WLP#$&^WBUL)H=5^am z{oAuj#$fBdrUxqx)`@Jsb$rL4+l2>KhvnYi|L}eNq-*c1@4ghOs@Wg;O=5+6HoG)yE%~$K94C8x$dB%a zzWhh7$;n))@n4+zPjNrP&6nm;ErGFjnmpJnW;X9v_`<%l=J`kM2l-uF=Xc(-oK|5I zzN(`781M3plMjna=>$e|DD~Rxy!FetP$E)rsoVCn{|pfxp^4MJ`154gDSgmC+AsC< z9?y=Cax)+8O}ib`ex&TWS)x3f1;_sU1ynL$C@N@p5;$Ys&;Lm@q&5Pb% zJ9mD;m0$Nu#NWAla%p6o z%XY(qd`cVIG`IiER_K#h!m{n^BsZ0>tNt@E)XDzXd{|ELN8{ssfy3*(SH7Nb^LA9k zk4e8y#X7lc^(@=6>m;Ek1o9}z2bobw=zb0p`y)I2ME-WuN@lMcZle<*I#1lv7UUdjQ?-+A# z;m6enZ}lIF-Y=K6EB1Co#`PkVS)oto6vaEPh(4G2Nyzr7&!f~^OJkZm6gL#dd_R`Y zUvcjc`Yl+Q4E&?Ziua17G;#zN)GTnc4K68!VNAttqc3jwM z-5LL8*S4wEMJqPkxEmXBRz*{3%}%4)_4X6`y6St(v~M|{IPoO>wEu6LAKm{%<~O8X z)ve!RQ@JSPn^f!-E7iSow|tD&yE;!}UV6&ZBPk0bmt~hL&OR;4wAcG^o%vN8nfu%| z!mD3+{Q9>w(QuDc`E(hnoU#j=5uJo=|e9ce!p;TkG{$M%|Ck!CjZYSiJhfe}-c-O#S5#KjaNRbn8Du zYpM0=%6D5{nzn8=%o21up_Y%9_m$;+b!6);Mhcxcvx!YrRm3w=Ney|LY*td$SjvtSsj}6V;-o zAXLP(p~Esgb`n!~o`=E}uUB)gA5HtuAYDIg+P=5i;v#dsy&vw1Z~tSrYwgyZ)6L&n zFO8@uKeGMT^cI`IC9ihzlo|U!e7$4MzuP9|lYWMs7xbRi&|YwuS8M8x+f5(m+U7q# zQz!D_>bljBG@tfhaC@cqmFyC{04@QDzEDP5k2n)A4=!w@GQ&gj0tV`dY%znC;{Kcx?V}(~AELhs}Sn zUjKG(zu=`ek7sFT>Mfo1j^pjK9dqAuYrAZ+=~JCB(`E^q^Tb)-@+B*-+KIi~$9s9< z<#nd3=SGKaxsdy?_0WzxYvPjUt(GbDRIb)sm8o{=-K4Lzc2TvDd_OJwZT2s=uIlgc zQdg0XDI$Tc45DF|y*FKcr?0+sbJMNrlP=edoP`$qE4Vtmb2x2wCG_NTE_Mg4t0D~H zq95xY_3l4-pE>@=vd>P}On*zWRIXciQMvV5mxx>W)IB$oV^^&z{+zDDz?Z#sz24vD zI;L;SAH?^R$n#cY{bx98b#CXfn3n}n5&qeny~|p!cq_;F3bnBGyj9&P5qwT5cg0<) zV9&FfIoW?gejHs`BmX12`f+Za^~o)-D_BlvCJUV`E%18BDJ90R?ZT0#4U7?!7}R$2 z>Id|PUDnlK7#6WE(Q{%9!@)>L$>Vj|&))aibJ;2VN&1nutF0^dkMWAb4|CnU{n;yK z`*eLP+cit9H%srLK>^sP?hF)kgb)uTibk{Uha{ByzT{ zKCKyaOq%0}PE?`DB##)j5Z|shet&NsFx@)!$(~Dby<2~&SHGCI&GPbe9qBi>zk2C^ z&AJ

8W>r&m{As-1k{C=W%>g+o?DAoAZinG5_UDr_Q>=6gdCfuD&$J(9TbfijOnQ z)0%t!t>A*FA7A%alS7Dy@{HuOL zoXY%1sZSUE$jj8p`;*Sz8Gf`m*fCqQ={)O`X=k&ay*Fp8E@*6V$w{a=c)7UHlk~T|=5>;M zr|V(uKU>N$<$3=MzU*iDjCJSl8CW4# zm*1-vXU?)++8Ft+)akm~y*tO&ZVWkfxTr06p+{QcIa}B1MH62c#@ke+KRo*Q-|1iF z&0lS~V|QHJqggQR+QgSa&e>I;Bk%lYSbX=^$)Yf^JF6x&Jx@Ftc3x@ux_xom_HFy@ zo&Bq33Tux(l*ktGb3@vUJYu%R0-YNLI@aoLv zVh>#`CZGAdO*Hkeki)v~5C1dteYxLhC$i=Gm!3wbc{Y|ZtBHC!LQO*T#2Q6T2l zX6bFnkZ(9)e??e#?Dy|=-=<#rwD)UubWrrId-bLF{#9pQd-KXpv&QX`o!skvf|J8O ztXsZD`mL9))%EbrTbn$-oLraUU8?uVV@>0kM*nFNk1ZwEyqEuXahCky`$Ab!vDZ4+ zy$ZW!x_9ff?W;3CI9;3N6V7*9*mqyZ#LpXNbObrAGICgR|5nlMqf+bIA6Cor{7?@6 zShJy`+4t%?dEZ?d8WXy%z0;f<{WjcS(z{g$o!TVj4dXaJ{b%U2=d>w)d|&$YuA|Fa zO=q9p8|-^o*kPV*+O=E9J!YvJdCc0neV*9Ips;&vrjof|r84{i2u`n$|B zR4lsm@5fArNiXaCbsJ8skGb?ui8uPuvfU3(l$@MdQfqYS*YglQn~6H_HKJ?xmbZ4- zU!1vn_)JT~PS+c9GFplfG9Ik4o`H-n|*Z z*B=X4Z*QsJ=6={-{O_K-wR195ULN|g?(V(3_uDENt&25sK5pB|-xDYB(f!zehQ?Wa zvnwAqFM2&O>%!fmi?*%k$yeO6-!p*o@u7wDG&UKz9ai9R<6P%|NRIi+9`%R$G7kS4 zSkrcW2=CY$(YyZejCCe^Zr)zINoUiHSr4KZRZi|c<>~)@-$$jTu`A6#{jop0ZrirI zUYA~0-T(M++R5+MYt6qc{IG6q_M-E-iK}E!dwqBoYvZ=~H#?rry?0BnsEg~gl&EB1s9T=Qt|!;#EqpLv zIR1~shviN0N-W>3%dZK{xb3=im+jQny&v{`&g)(9+sZ3>`z@dJY)_@flcRK_{CDkN zoc(Uo{qWp(X}3MMKL4SXvcVSKiDT z8u-eS`R9WG)>GP2w~tQams$4pKf@-qu0ZC4buXpawRbuwc5MrvxMg1Zh0Fb2U-rKZ z+Yo&sF5}^}%9*K78#rztVy4Yb=DeOSg?+1Pt3rvptS4>me+I>){~6X-+J8}5`|2R_EV_e?{jvIv z$u)XEYAe@8MXD`bX?k?s{KQP{DSgqaXLr1P$ag?UxMo#E;~eI`Y`M?J=PCaPe>me^ zO|@=+S81(tK)e5?oIUc(Y?o{=_TUugc&8Uz&b#iI+f?zaNo9|A`FF+h>)L7lko(VY zczSv8%YW6bwtK&BV2h6KNcCJe)0v6WQ`zm{gU9S^3vazY39sJhp5x|CRg4?33*w>yrk3#!t0- zUn+)$OC+tGb??LTliIab4**&CaKMyLj08yfBl`(kETQd>3qK7kn_u zGwblj_&3XJyg$r$`H)kmHQjI5O~ckAa(Ro^#l(2l*ITN}6TUU=5&t#|**f7E|0-dpm%b=!Rr8;x1A%Ujzf zvaK+&7rb$%)YkE-rFX_X%|p)0ZN0%yXH2gV742SKQ}^*%`}!ZZV|Hx6p!z|-^^a%b zg1ebXrz*ahc0H0-%Wf=wz9lE{y=DK&m^JCoPp&)tDE{DmiHh~d>!h#!v48ZJ+3VsK z4)z;cc=HbzXPJGx`=qgiC;ZyaQYS%XHqOtY2XlIAoxf#WE8Q^09iM%9=}e(# zPrcViSK02B-CMf%YM=Gt{|sC0_OGZ(wv)8_-t;xBUgB;=&JSz}q!4_E7$`yi(u| z&*QTm-?b(gcRhIUcUI~5k0RZbkA1EDIqMI_-81a=-*!Pt*HzEvy=9VlZ^`MW?-*n^ zXd68(GRj)t^q(Om$8EUu@Z$RXFY`s>oO5*6`*_@rQ~4YfBit5mroy*aS!vTB)87FvR(~jZe$2e= z*Sh`53tw*YUHNKP>KKg?%(CC8F4 zaVzeJVM*tgUbVYhlBQpqxNA!PxgEb9BadX1HEeYAIQGcj^x1wv`?T5n+vl^`L@xXx z_bD-P#miZ_n=`7?W==BYNNqCP-x%6==;H!?xoa%PqzkOKhvdxpZL;>>>sjAlZr%3! z^hImS?9+Mg{`F70zwWQ)t}Rtt-@oNw^x~et$9+53yr_ECUHIUI*z1ss>yjGZv*^u! zJjqbxymUL`#!llY2@c94f4v{CxcKF+UG#nCie>jhujaS5O|)=)RbTnz{xHAe+CRmQDdP8(KQ7&37W}F#DsbEOtxY1@T&g?g zo>bV=bL8}Aaf@3F%kA)^N-9&`fazax3Aq|Fa9d* zR${zBhPimLTY98~=kMJo%%*%gwIxM(&Y!~)>#BY~Xz$*m`uKiZ*gdh2jQv5|nvYEX zwY*b*i`%ZvlU{wQu3RGIH0k)$_VNkG76rIZu`H0b<=gw)qGFmp`yY!B_gnsCFU>Tw z=de?tccO{mYG?{fy3jef?s| zq}ZkT(bd~O-}-jBc+p4kx1vicoR9ok*IK{u_4LIqwN|q6QJc3n=H&W5KA`yrL&7|KWpFo^{x+h&#m)F5aX=Xa=YmMZ}(ZnGZz;W%3V0a zEa>ym|3GFF)Vs3uS6&w4UwK56a-S%56*WBN8 z!0orpZjY1JlNE~!dp~$TR`2+y_IjSd#%ndaD}Pko_qh5_M`7$M0X4#sVe~UU_iwji^`QRXt{5 z7hZ{H%9M*29$Bq(YW6nm9g+#VXEV$*mnk(Z>H=Tjw|vPt;UbZv4hlx3I)F61A>{ z`#1k*==)mVSc^NoIxrPL!TsvEJl|`-j=bOTCS0Ae)q59O>2RF7vqKg_k+4aplT5iEFTYbMe(`)EkLudw=$^y>HulLrx!u6$m%aC+c>ABq!iR6gZaaLO zHg)-Hv5PuYZ3|ar@K(yc&S6{W)oZ_At-V_pU%puV(RN?? zudl!VTD;?<`!Vi%{vZEtAG{VPb8V03n!S^#d^|zckH(Yu?Ke5ZQ+iR2Ik&R-8ZVy?y zVpgoN@0MpM-QQ#OOGWD1-iK4OJ(7f9306e*Eo42k@A-Vc9od_%rEXw2F{7u{H28@9 zrfsuV{t3GBCpMeCMNc?l_x62Pgpbq-b%*TrOzN}voL!RY%#qS*kurhpw4RZx#k(oH zcmI8LUpxNXw=XWcw%&|>cl~+&)V+GYSNnH=$hv2>@58%^57(AQZo9nh+Tps1;yE@VjUe6(f%E%l?< z|1)sf6t2DX%8G6GqrZB`Zv8s%zTv~UIo! z&*oM?`f1V~E9`eg{O~dB=^sw>)RL>o^;olz6!{e9oEtZ111R-*0}de{${Z zvh#~qMt$%9Y`^Q({gV98cWtlzXE<uF@c-ZSwMf2!C2NDloF-u}l_ZEEtWAB%&g9X0jMyXV~<9VokH z^PHnH8%_G|gW+$%;5QWeOuJ=zB=)j_av{F+Vf@4TH?umd;K2&hf}WPtWR+{d?Kgi z;woeFa|TcJeFQ^o%Rc>QX#QUM_0q4&vgV)vGc^8ZV7?qJXIcL{GVAgW@q@Rve>>h> zBg?n2a`nq!^EuzV3#jb>!aOGn2)tosDgR`={RU?QmVtbFMu9g<+Ig{id(STi)!EH*c}=O77VvpC$sCKkH6RkKi;pu4|#{)qt+{zwI-;`fo`s?oc!@joqtD}Ev+n;;= z{POzQ+uFB8#jJh0#++l5mRIA|)ZWDf9Oon>n3i7=;CLT&eRgJ?{Kx67TmLh(*U4Yb zue<%=ueRqr+5Dh=H@=HcRw;DPSv>b(r;*3m6NeJ!OpKcx^0wCO`sWSWw`0$1e>mS~ z6Z4-z-p2F6+1xFWB_1UirM-r4beHu#6@91LopSEerkzI|{;ZgxzBl2I<&Uk8dEFnk zEq|a}nX|RKI@c=uWvQ(7;pxA8!h%#TE^>Yr>!}pm&iBNVlcCaGd17yOeQTX*`{S+O z`=yUrNnXk;d3woyTleaYXHhd1Mh<@na&3+`;_~#uV{|vqV8BQl(v~e*lyYQZ^>6+&ni4$9EU1Qf?{VmSnEMWCOaNbPW?2|R} zA6G|S(zR1Q{&?D@uWBDkuZW#q^`q!ogyI#|X(EML6Fha+=!xD{zNL~gGkBp2-|>JC z{k?g6#2?+&lb1ctwtMBBn0*u1%F1$FQkc-MrLW?_a!hfKwb3*cA&Y}gY}9{SZU3Xa z!$$gXt-t@ZEvuz>tl#p}MmK*&Mcp&)mb9{Y(Jfb0XL&~6uB?6KEfig7$r*e9tp3s0 z_e0h{xgX8%njik3f%(b5ueY9a=Kl8hlYdGpXDVZZ+`^CXk7|B}A3U3UTuy4oOr;AmO(lGS_OU_NyZ^|vUO)>i68#Y!2YA%>EFdGrkSR;VgDIIwp}UZUfsQWuH`!^ z-?hwlPHTDwrW~B=k)9B_Q*Cq7-#PEU{AXyMb-iK1)smaKUthndyZ_BwZNt{46CFWM zbszBSU7utzXHq<`%DF%3qF(3jn)Kga`u@q>_ZQdCy>{+>_Njl<%eJ4++uyqPk|oQ= z)$6qm2!2@}F8%o8qyG%se^}i=PTY|ri^^EcP+`Om=q$M}If_m6Xjy_t z%J$Ws^+)S0KjydFSyxCOesxP{p1?<`IJXnI%gjHXX5BfxVES3XR(0n5X@1sSuk*IV zi^Tdq^b9}p?ONL*XSMCT+osv94Uvc}bCYw=P8FIuA#u*bvPnh0h4%&R#464o-7jh* z|7frB(O+^CmsjL2tJre;KHu4Lonskig7fC)CoyZ(#khZ(=lAhwc>L@4|F(YOo3H<} zKB{t(>*M)iH~w8T{NWssD;Cck|H3?K>-3vzmqp*0$GLWqQlwN+kM#NC)}G{yC!Zta z82_&QWBd3&gJ8xlA3L#+Zj(%dIkxY+vS4d;{e4H14?@X1%KkozKHsBxu%CB((Ysq$ z|A|$c`_J%r-|WXHY~mL>E{?e2y;QGTy*+Kx!?jB%NvfT_us8HG_W`s0leanx1ooSr z|DpXY`aeU99cV^S`N)5UY%Be5@u&aJ-+b=3?A9l;HJ7*7-!?D5{q*tNo5Iq+E$-Lt z{~@yVr`uhpc#*eT*X^wgn{OHOZlB1X`0YP-CzaY7AD(q>{(`gevy<0k$)|qOzg!r5 zaZT`^&db)R9=Bz8b-i1cCv@pnQ3_J#jV$2!j`9(j7#&rd%;zju-UWX2iE^K7dkCRNO{6tS|Jnrh2o>ngloedovM zkJs)mxVrQ1xBU+)c=sQR7j^!ieBhpjXO49I$79PgMd$pQW4Za#!+jIyF)FzwXR;KE zIGC?2SJpF*xwiCoU`5m6!}naDZG3Q+>9gO*((A2T*Gpyvo+$AS7TrEyW}oK{w_vpw zk8WLlC|{s+hxz3!{ey3V4nHud=UYDCVYgV|T)T6=>E2drr))Dk{jYDk*qM*{`6@~q z_!tr=_!jN5f3r}%Wy^cM*dO^vtkUK_+N&J0`)b`)^Y*J>cFH|s-Cl9&nB}5R6O}wJ z*-m=uR1{yFx;w7y_VU`t`8ze^_kQ&|5i4Gs|K0m@p4r=6^YmqZL_h9Z{89N~e)}Kq z)e%RR-_H^->6hL2Udxs5)MPj5XPio{6P=4h4_LmA%m29gad^ic#*cod-F@@jKJ>Tj z->kT2$F;BXxn4=FQA^)Zam#E+(<6zsAzCZ%noDNAu~GieAMt_x&|C3C^W;7jZrHo| zitE>V%YLO4>7KH^9TRh4g4k^?i}KzA&t8vXk1YLr>Us7=e{}c1vd2F4u{AIIA*rLy znZM@F&PaRq^>OWK&87!;v|J*k^Uw3lcxAa<{ryV4_x{tLmftSj+N4>uW8dvR{~4_H ze%rn)yr=o&?jv2{$9LvQ?zvtP&t2iFa(U6#9asKn*O&fXs`B%`&@0Vi*)4p2R~%j) znJ#1Qy!OHTKC7+wkJJ_)j?=t+wE5WEZ`$6mMz7y$*j-fdocy_a#w}A%-y>NYkMB>G z>A8BpL!QmX^YKIZ_Gcekd#4}XZ*=zQ#YGpnvv{VxmF{SLx#O1TJO$5B+zcK(R+smw z)t%oKXLa3VuDj2!jURqT`>K4mHaR-=$+j9m@4K)3_I0goOTX=Z^LqW8kBs|2&3^Of$wcv)&D-nll|7d{ zZm}-j$)VWi{5h;ls!q8II&{-;iBtDlh5pv`0@H!dAzWV|AX~n zrx(2X^fIb;-U8EjzFWT8BD&UIcBf5>&T4*UIwoUKIJLk_&#jjE=*J&jxm+n9wjP_A zy=JpQS8Wof`u5&(-ZK}Ey*O9)oWbhJHG*?cK78!g^zyik1FzH*QM^> zctL%`-d($kXX@#tPWo1;ZO+UXpuwOg>eMZ#`lIsk+3E*>pSM2x(JQ;Y>)qU|e}ez^ zu)mW^I%dcc?7rovi|gJL9>YBXH;(#0^vwGAZvXYUTkp*HzByaenP0*s$`m-Qg7}v*>pUt$xkHyQ_ zxIXmW&k}n!F`cb>b#%n(@^aH9XJsb&E3G-Px&6~ypTPMhl?(U0`I-5d=j!Y8&G(FU z{SaPRW7B=)pYUb1U0Wx6w|(~gu=GgSv}^i;y^EuhR~SoHCaD-bo;by3a^2$xTd(c^ zuG;H`;WN@u_@7vR#H`Uym zUUqrew$J_{)2=`J8M`h%T6^{Tyz`m+RBBxIeE2u_!F-{YlkT`Kx|{uNR^Gz&%9IP9 zCgw%8v&?R5aVn?G@`&FSV&-^az5jZ-{s*~lab>_Tzd+nw7UpnY25+ngS?Gp(zc1G4I zU3zvkM>#s(Wmi+hJEhOX8#Wo_uI(#uP*z@6d`>&4i{Yu_hqHgrZOvYFuXdW{(HHYP z6w69IuHA~5qAe;tyJU8!V#TlGiNz0i{a)R>cV+LAf}6K(483J8=}ex##o1{3p{ALi zJX9klL`;lpb!8~JCEuEMd26=y^)3k}=aNls+l}s+{frah`^gsF(dRGg#vUB9zDB&X z_)+xkr&}*iKeuoDuKqy1x~p7n6SdtIr@MW+QTr$Hoa2k?tmiegk7x6@+ey_H2X#HF zcGKl4l8)W1thPAj>ayp>&bR0D zE1x7TnJmft^meQ-Xd`II)veyK{9b#mtY7*`XU)Uv&ZoDcQ!V9d_wvx;5zA~Gh345Az zHR6bD)6Do{{)xrYl+_s?$W39a0}Uc!!EBlLKgq=Z`abzTgOk_2HL}Q;GF@SOx_`^| zuC4JLe^mGGytqEzYww;bCU5nBdl_&?x|de-?wP#Kk#VAG5~lf8o9J@8 zeQ5prZRzTY)p5;U+jf6!TUojKXv$lWNuHXY(>_O*IsE8Mafoc=>bbtS#zjsnpCw!K zab?K~8?|q5W=)xOW~JzYmJc<@i$vX(C%N%iU!VO$_~3sAHaoQn+Tf2;N*3d$1Io1tv+l1vb6Ncma6Y{&ei&J_vSAC z{Bn8fv*K&@GB(v~;(wGMHfugSZJ+Y=qfyh_N^QNeC7*6qx^?zl*dq>>SC0z>o29+V zCLJz3pn9wPt@(eD`EY>YPWGN!xd(22W#U zxRZT#i+uAufe&Y*m@;p9cDyJL7rz#CveaZ#cjv`RnM)?#*>di+_S2<0o;lAIq*ku^ z!}dYF!$vakp|87x_kR8xS@yTKeakO26MBBGvilwXj;2qZQ&m>-gfuM|@r+fOpS^$T zZ14AzqT}u|Mw;Ea9IZb)GrsQRzUx1K>L26w7s$5f+he%$PWG;Ey&oq>UQXD4af?0g ztLtLZ0$;h-9G~Q!@Km);_x|3@@9T~nV4T5x;>Y5U>wj!l{t5qhM^5Yde}?Tg=8tRD zeYRfy&#?7fyw9v#5hh2c-G1ZqOhD$&8D}kao&#-<_8xz9{ZM@SuhysEkHm|fUH5Xj zmEB~em{+sRJGRDJF51+wwl|tgn;!o*^KP0~K4qflmRmks$6Fnj9bGH#ebUx8DD(T?tuDTH zF0)_myMF2Xli6W!_r3YPN@LZus=aNC%WJch<288{m1Usen;^!>V7) zdrWk>4<{dyTDLxR$%|^`39qDjUgt8cX>}5-Mqh9~R)zvd?W~g4%f3(Fj{`abs1j)VaCX5#XeY!&CsZ27DR4$N=<#T4f(0nKO z1;e6ErTwCrReY-#Wp4i-=lX7I!MACGm3j9{RtldvJ%J;}U3gnW<1woVd|MenOWn|M zdPTddzu=4Mvr?v)Z!^5>mYm&d_i*hyfvP3lZO-rbvd`A1$~yfH{=xom=2ls^?_s<7 zD~02-!h>S&c&w~6()*dBI;S;MGRa@`g`Kgl;vdabdV5!Fo4i;;BKE%8Vzqe*{kyqm z+?CU*uZ;Bgx=#9G`=R-5*Y0zzFY#x)b9QZoaC}pWYK>>*s_3&KjgtadF8M1u9G!0$ ztbfMy_nk}EE^Yt1cm3`4g)f)>XISqn|84${gG>Kdet0%}!QG1O{vYBNKPsDbOEx`l zf8W`h2fgMo-ZGPn`pq*#dYJrv25xZLTIqYdQ|;sYH{KskAKO*mXF7Xs(xKbV!dX^+ zdkxIZ`HQo8Pu;c2pE*NVY10Y+GtmWSYt`4i{>S;l`@wH%+xpl`d$b?ENc6S3`E{}h>hFv0`^x?+`_Ir;=TwvOW9y^* z_I*~Db-&;EQGL+un&rA0*A*|!XU*Za_6b~j&$}`(ry;iYxsN30WSLycr+*@UdsJ+@ z{;0m=%Y6P9##vVW?WW$gdXN2^4@)imdTdc@A7|Pm>l^Gh*S2I9a6~dsyHONiH0vvG z_5=6knqaXRyOvZPTb`SmE4sot-8 z88}tGZU1&_^Nd;Lm$z1**2p_k|LW`!gHsdFg)p$sT6=(b!BLCfYvWVyKU@1FHadUS zZvT*5uckj;|MPzRwC?#IruXhifBc_8?#G0elQW|7TWW0AZ0ojNdUwj@Eb*R+Zl6wV z);TqyA(b)YsM>>_I=kC!<93{^EoZJ`*xbNYq-n;p0ftZ3ukt`j`+{Op%ZoN{?YrqeYMs-KIbbtjUV01qGIp*7OtwD zyY;P|q@2dWsc-*mc&WsyseNeTyIVgQlXEn4wi~k_oZtLiPWt2fHlEWfqGOf>{8;9_ zUij{v`^Tm9XNr`Y=NDWTRB(#WoalBfhsW@Yewo?7H^HAXqy9}Vul@BpE%^J={|xzN zdnT7}pF6EPyDFCd@2p2x;=MlFeuZ*2m6z_gmQa=JM?KrN8RR-#)AP_)-0l?D>QGH`gDoW4ZoM;pD@9>DRMF z->ocNlf6$Nu{vi}Tl)6xJ9kVtsd6u6WAyPhJ@I0`fY86%kJ9!pJ^LVDCNewxp8kio z@202N+}6ImySsAkS}C<{hAR0Y@A5*IRWAIT7o4ovpP;sH-bcQr4}0?uSWSJlc4g+L z3cJt#6W4uZ-xKjR>f&R?JLe==om&1pY-6`v!~ItK;rp&Hd!-LN58gEW#FnP-J{vY% zi<7?b_Hpb+f1Zh6n^|ukpRx9Yijbpa?!L@@Yu8WuzHaxM>ucxT>N-8mcFn)sPp`kP zpL%txH9y~LgCE_8f90*tH{J6g?zVf;q+1>-oma0IXdGcWeV(Op;*FBEQV-@oRGm|2 z`oV2|YNguT#Xr3LcWv5qQMWN-=F*78;bnI}b3E?7qRBqfJY8w`t-P~$EC1C$U7>wD z_{bmq)Y66juEuF!z8rJyeaY+hJkeQg$Nn<}&3bg}UiqFKP6?eemN;i=9AdnXZaVwJ ze%GCs>#VY-{ge1;-f8-tGjmqSws{J(y zLtrmk`vd#dUHgyqrpI5ZaV}i($7z$le$bY!ALo8Kn=35FEwjz(#c`SHnWq-^Xyi;_ ze75L^{)gWW-J8Dt_4fNw{Ybid_gm%Kz3)2Zd-Jz?tdYF4nN#QKu1OYiHmIvIr)=X_ zTQ;vt{KeAGyQl8+wBM_K>FkxO=kCVTv#kG|TYL7`-0er}JEh_e>9@~%-o3R`P^aXC z%^{VkN#SJ)6Bm{#1{ZHy_e_E3$pl8zPwdV6j9y7SZ>&>%Syg_N<4^MHdq4g&cwByZ zr9?MZ-E3O<$|SXD^>8~EHMgB@W-+>;OJ%10Vw+l6S-dW5 z`t6g^XZE}MBTT*>sY1&h|>2JKew_l@_AKj`;v&8YEym>Yj6YVWU@AI%}#uRor2 zG4Gm1pzQL&0L>#iHwan0Fn$lcd0 z%HL@x`GfnA9A~@hzG>fmF63`r&8c3Qw|sZ?=|&Upe38$-&+Mn2l`(ucgJEH?mc`nt zoBtU~_5R#a`&+g4cG)MzX!qCkrmO$$(wpla+suO(ko-M*bg`dFW)n8nOy-W9S}PaM3vfv0%Z8?Ex)pZ}PEW`!!7!ZzLa zKOX<1enz0H2#>}h$jtS}{|vXZ5B$^rxOI7J_@P6uo^i{%dC&Ft-}=$_dHbw){F}Wx z=4WkT^Ha^Zo$tJTPI&W=`;Tm&_ib6v!Os=_ghiYKMFs%J6`;wpZX|zulB)j2Di;7FOk^etk;#Nd!Kc!^HCdL zE5n0^8xxK^Y4Xdq3wt-c{`l0TpLhH354pT?QKrfL{KGTNf5e|&>?_y(VRyvjBl5g9 z>Z^QLxvulATyiVMbLBzFKI?56p*^dXFfHuVVPxp}BKqn5(RkU4eU~5c_uF`;maaQ8 zPxzzU^rbg-x2;ROys-7UUs_wW?t>W9>$jIPIrRjeie2hw{FV3kVSAp~lMmIcLJIr2 zPCk3F@%D+2?in?04GbLzBRK9((Pm;RVskJ(@uT_Re$K7?d&2Zr-Feyjk#(B(i7m}* z_7vT%ov+LDDPqk+^^BdG%EyixPH@v+6y&$xDk?sIR{7UWS+}E4JI~WRHeLJ8{!{m6 zXYc(br@HdT_M>(DKlU#5U18q4|DnqMk3TP~bS0PnXW;lVOXQgAc0uNU9kI!u?)_W?NjAIxXXVQ|dWkz6E7L1&D#~}1{=FS3P_(>a zdhcwz{D@7L{t0K*OQp@zt@poLr*!lE^xty%LY}3%-6A(Wxh7oR* zXS@Coe^Afwzsz^}i}~r3eShoUjDB0ZefqbIWnKJxo?8b!OFUP+UtNXCsP(GFdjDM~ z?e|^ZU-r#p-sah{^Y5?re_FEkrs~QQf4F~aeH1VCqdDM`O}c7d_alzCz4!kp{b#sg zKF!=LL-mHou36z-zYM3cMTmy0r0_JxovgRMb>oh&Oj>xKS*hdI4oRk+@~nSkKWuL( z?e2~C+_rym&@BE#d~|tirzDuDg)mgyeW58h0j9t)*xRB zJ+@_b_3Hl&@t5qPE{n&`|K{_fd{yS8>|)-mM~9<6`uvmmT)*Y%_FcucubrR#`(5_a zpl1C&79QaV+UM5TDOcP+I_c$n{x6D;vghj6OTOAwyYiap>b1&&vyR<6B)wwcj(r;6 z3)a27@$Oc-l8M_vJ@-peannD%I{x6<%A|Wr^Nt@Vn|?jX*DW?IR#T@dc4x(-UA+ra zx-6ZQHtjQ5SZ=VUH{R`s;L<-@7kHm{l<3MIoS&0@Rz22V#B^r$^85QTwu;V8t&A^e zoZ}U_-5{wb^KP84uI!rKvB&Ngb$eExj&+W9Ee-#9)9XKj+1>k#E<11jA^K=_y~H2A zi|-d~+qc=kVUx!GSfKA$gmIrc;Ok?lWbu3ffqeT|K}M|9}s+(eo(q~Fe+)UWnyJV7C z!rMs;{;a)lcF~Xj3`gp@_i1Nc+o!Z^&;H9>y=Mw(?{#|iC5lzt-S}rdckR^Vz6`}P z-rv%#BbSBcgfFYO{4o8n^P|1maUXs5r|;Rhe_PkF^8wf2C0>4~FTlTfRm7G;Yp zgjA+(+9}g>ZQ0I;`x*X3U-t_wRYW>yJ|ZYN5+do>t$?ezsr!cx-s_-c`4z&i~dow>0xx=UST|v-It5 ze)E=l;Pxa(#^9Xe*LCquKdMURch1xN*mG;m_ernKu9XGcm=LCIk-%5L_DFK>(=y%0 zV;66$FS)j*JGS0^-=+6c@16R_sk=PlX~m3VuRACZeLx%bqt zZ`jfPh`%TGg6Z4p=pUs#29eQsvVX1lHGTfd<0^|@d1t1l)oUu%esdv{91;nn_2cORNwX!Egq1>+>$k2;+@+3K=R{SbVp-)1Mh zZEyR-krE_R`@@R`6suh z(#G<^ZT0G#dg3uVH<}$?nmNmVo6c#S`2%~`+O4u-&waLEdXuNGv~9;u-_}^EvQq9Ru_iAL z3Cf(4^}H)|;<@OBS5oo^BI-D7r~K2tw8l>Q)%PHqTYKz`X5F&&DnBv7dxh$$lLcoO zdE~gI@20kZTWiU-&x1vof80&)o_&+h&UUXdEpvYQsZ%GO zr-^6AaAm#Re*Nz6OXa`cZ`-A4Z8F`z`dzN+_vrZe{S$voe60WWJHGecjSLKfBE@Fm$ju={9AYA z(Pt+d4BXA4%oMwJ#t6rjcT7GI|6$#psQcWhd+d0u`a1tJaB{tn{?D-4b;2(Byx*Gr z|DvR8e*TO6y|DOm-ukr~3-~A5WY`$YXR9f?&#tG|e>it#X8kSyWrtO6`IOx&sn0d* z9b`WsbX$XXE^EG4JwLlbvh6tk!H>nx*o| z`>fVxi%+LJZgZVJ!P;B9)|H{g_oKV-j;sI7Dz3>3RBU$sux+2IwerKtue@UKK8HjH z@~{~%vkB+mFy?IOn7HxW-=9GY@4n>ndtbW#ZQ{p86?M7UN6Mns{@o&ghXDOYj`&4uDR;AXCUsYWUKjI&Lo4BB2df<=thj%4@q#vo1y|RZbZ@&MBXA@1L zYNlWGy3NhGbc%mb>trK_^>b%WF%Z3wnfu}GAN3FZ|AbU8CI7IqF)dMX>Y5dB?Q{Nr z26+Y0;-JUhrH}tQ`=8->*KegRh9A!l?-Sbc@$?^&i+_BVpIudvmYaU0-Fx#j=4sQt zLucIRocc5MlZy-cxe&GNHwgv?R)H}h{}~weN!|I;`t@ZE`@@^2%XgWtU12s`aPD&b zJ*VRH%HRFmUV7ey@zb~754#VW|Ks{my3TVM>owE-j$6}a*VoFdn{?;q-hWHhXT6aS z=H|E}@$cnx<;inDFO8~5HC@m5vOsrQ`;tG27ZWePQ999e&AjVSTA%5QmCwADb7h#G zi+=qp!uYAT>xbe#)gK)d=e)1{SpA=Yvum!*&G%OGZ`9oAU9tU@-N`2(_3X?0t?SQ* zysJ?VJ}58$$9v%o|Njh}$v-B2I_B$iz&KuS{;rGfk{bGFdiHo6P~%MCIC-?T)Rm!T z9IfxRTd=aV>Ccgl?Vr2K-%Ph_ zOn)pJ?ET?W#;+-yi=u6h?S5E3OWV4WrPb+P)XqfFmy${4n+12585n=n6=4W;MaB5; zXS0d^b}BOZTjs|Njh8IXggr8!$gQ>gR?{V%nsJ*s?{?x7U=c}bt)^od20R~RRU)aZWPe7H{G!?)e5UIlqR_|Nc$yE$`h zRK?UsTPOQYPTC=CQP!QU(sAn5n-+t^a+$8L&+I$x`)5}DD*vnH(zmC6^{sum{`2;Q zpO0;Q{OzOpVc+n`{Elx?em(OAOf@d~rXTrkrMhi-Mw6rLnyM)(C-Y`%D@lBkaAV`z z8uan|k-gzz7x#(Wy%XpE;6CH4IQbnrEBClQ?mo+I{_s&mP_Caw%A_D=y%(EAgqb3n zb8p87$Nq`=pf~^FTl=_OAD6eU-Jy{jK+4E=3y4Jh&<@PAoc`Fmw#${bN z%KJIS*QD^U*c)~g=N+QSyUUq-=Dq(XG*|k!+JA)G9m>=s4_HVK4 zFv)d#b*Qo?`)~7#%mF^r`e@R(yEog8PP*!t>a<3}!{Zg3<=Lpqbt)C!ksp2E zx8>LbF1lYa>7;jT#>hj#A`?>MeeNksm zFPlF*`|8c5pW@n2NAY)*#0&g8UuVDL%AVYnuj)lpKcubtr}E+Jx9v7Q-93ES z!#?b9w+UQ#+w0rsWwRxB2A4lyC#9(4c6VN=!h)&i!t>l0oO~1;9r{Cg{qY4Ahohz+ zKKqf|`_dlUM;WD>>bvIqPWp9J<T z#nR|)J;Dkds|B7qZA{pCjISzm*}JE@v6t(AD%aO*Z_g{=dnjyE;HB%^|M~0>t^2cj z<7fWk{~7pVez+gKCD*&?$L3>Jr7dL!cUt{LUI|%ynr&SkZCNRoRxTHEX$6l*?2{EN zJVkNxAMea#zp8KJ`Dkyw|CT8~!rr@oIJWb#?f0_vw(Hi`OulWsWyK|5740;G6n?b> zb-7Ezug)|2v6H`Tt&ENCJ zM0>aPe+J1|o6om*dOXoy^fQ;~_J*3ZhmCgT7}zw_CduFWd`QbCo%i1S)s8DFyuCJD z-O63kWhQ*Il&6R>=jid7Pk(M){f@Kg^egSl$B*piFz@kiY46(N{Aj`Mb1kM4b$7St zz4~e2(|7xIf6Ki5e0l1;m)W=Pcz?VUfA#!i8^yH; z*VJVH2sPXI=&aZGN8CTQ$HgWe-flH#($Qq8==h~OqMeTFPH|DXaccFm)zege^>2NB zc8}t3mp`^2&mZ9}J~+$0t7P?hfy`hX-fX#rdGCwvh0plsrPDh9v}E{Cw`*bSOS68I zKU&xPi2sMzyNB0p?Dg6o{@k;HbW3}Vcesn%3$)Hu#2%MCaI{AI(tieB z@jIEiY4=Zm%>VVp+DqFkUg)LR_v~!F)v2mfe+k&u1$3DZDjuSzgjT_uLM4ll6bA zcGp!^wf*|f5FP*N%9m5xXaCrL{C;Rwd;8Y4;YWn$GxYNPXZUR#y6t1?(XERkV#(wB5+?hTE*;AwBkXA z)!N*XZhvoJsr@{0x&6`eOt-Vjq)P6YeE1U8wqU}^^o(C?n4+gn{~OrL{q*pE22O@| zZ)cVL+m~CH`S;NNuix#j%I1omspx*x-dv-&;`s8{_jE68@&3u}n1$&ADH_nlocmI)iYu=w|`__bydi~~|5Bkqecj^15wzt=S?^%o42Ls2GvM+VZ zq%RjQShTa2{ZV_1O>A{}^27Hc8T*v3tW7fZPF$K55aYLs&nPthspMmIX4}`3f0Taw zcl~($hiTp`et5@zFq`|ZbIGR;lj|O)=ipN><|v|D61)D> z`@fat(qX^fezs*kDmAZraYfy~{|raM!^Es)x41>`p6ln*cT3~A>XP(Bt#Zq|KGfe7 ze$?N*|KW^9S9AAxE{n^&C-?8-{E$h9p61mZmDo8i|DH~2*T46CY(|bO%(K2}$9MhX z*mrR&Z`|!;R;OYz$xz7H1m(@m|Kk`##K3flB7rb{jqxjB`=A1a0=`r)Hy))$^w*Q>utH)x@8olIc)qsxK=N_7i0T$%k;1d55H;OaoK!Ss!fGeYhl-0d*46ae;4dw z{V?CrH#+{K+HBF?S%EK|O!se(wD~sAVo8immwoB8_6g}4jh!+H-)@QRUskc$W82k| zT(`^Nm)=dU5#J`f=EIqt#{#cRO`K54zSU>Zvf0t`pS#0qzfApYt2c{PU0VJ2M_=o8 zx1&$T`gf*X-)i6bkL!m+jrSu-)qR^jOqUdTVpM440BiiIQHJhyW3cXSAF_2_nePeaw)xq z(#F{ zGDkLDGFlNV*jW_8KcS~5{9IU7-unE9>-%r%{NwtlHYae8{fg|MS7(!Lu4k?1+5XIB z>(1H5XXWRsT|M>o)FeK!eSg}$Hf_I>m;Y$<_O4$qg)HAqQWZM2a(B{h#%bI;H}Lx& zm-TgDmzBHd#rNqilS^0b-QugUd82FPGllddHU%+THV?T+ORFcwd1?P=U<~_{yXsZ= zQT1oHf8YJjz&kzp`TU!w3^CR2&NBdc$eoQ*-f5l`*+Pr5s*KBp|%~PDjd5=@{NZ!lC$L^PQ z9Mjw{_)qobAB!!Q_M9~jyk6=X^&uj2m)(SkrdOZbmUCMD?4(SVn8n@G)~g!!7B|Ga zxw=>U<>h_T-ahk@kGsDA>(jNrf6B{c6=ZLZ-;_3SNoE}PNAb=*?7VGzYwK4Ww(T*B zJ1KK{ws&XJj#+os?pbr}eU8TsP9aw9>r3`tF0JRO;D7l2NOnzymu9ZXeKXnOfbAZM zyUlzv^qx&ko~ql>{4`wOB;`cof^h5h9cO=gXBbyhAKum0zp$eDNY%6*c}Jx9=H_zj zZ-3Lf>GRphrF|x!;yDgis&=*9pE$2ezHdK&f7>6^b+JE=yIwN+&miP5`K5cZXzN>y%f8?y( ze+JJFCg#_RBVNmCTt0fWwQbSDyk$4JPU}5bC0?|e=>|g;-__gU{Eu|KAO4&9QTy=b z*Rzgwr#?>GHtV$e_I+1^v{x$4Tx_&PU~})&St^pD>Q9dqcv$^fek7hNqfTwZhi8i) z)k^z+zQ9&<+md(F-u|nuJuqEs^NWhO`5*ZYYX46C zG1=+TpTLJV_Ba+kw8&{~5&yt>?{D6_l!~6j?UJD$WiffolMLf>f9tH79%u6-Fgbly z^!-~upA?_jeQ1)o0LRb%+gFditQQIn|H$9F zCb#6|yPQ8dNln?^WuH}d?#f{Ks`V=AHMF~eZh2Yg#HwhpHn)Xv7Fdl z)Y^WwY40c3%KX;<3>mTfhwb=&bg%uf`{=cLfqk+SdC!jjRz5l@I%-jTjF;uMNac|2 z6BkcE#T3f5`dQ79oV4{G{oiI@c(dk@-#_IK?>qj;*Dr{0U3qp{*0&2=kDlGK?&6UT zwevCqPkSsi6XTI+IFPn$J_#oWibTH7JN^0U`ni9uN8enq>(bx*kGB0@`{Y-&^K)JK zhxev_RDLM7|4~2hkI63AO?BP=GX!5LUHp*OP5ZTX$d2vZ=PxhzxOn7Da@*Z#ewLo2 zb5neOSp8G@(OUXZ|C`&7R^78#s-~@b{M+i(vtzHcXGi9zM)0nRIxQ3UMCG#E424Y| zeM@iWJ$}DmXrJoM8k5Ugb}qkMDx3G8A#A^|vEQXFOM33a-Z8Rn3D%i-Y=KIVVR~tR z$ma{;oR7}m5ON9{jD%eT)5Z@t^O>}C07-FNe4r*4^E<$HEX-=;l_Jg1o&+T@rk zZ}a04J7Ma%Ht4qN%WPd+(Y<=>zKX3py7%4vweS8jh|OQSB}#7Yx*xkeKh#87y`I$* zW|UPv;pMb8pMU>eIh48Wkd4m#v$|-{=A&{(AKEs5^ge9Gwsvvc{j8|eH|1hho!sA| zyul$7Z*_ucoO;pva}pEhcpI$0LKZr}X1M;CQ{ znyde<`@+_EY3BvYy4N`;7Osh@sJnh7*qP(GuGhX`XLs*)o$9qc*7}FuOJ-Jmo4I7&`fuDjOQvOa+;i)*xcF#)Nu=x~-$iZ? zE;+?VwO{AI-v7dG`oGySR0)*|SS0GN-ruR_dkg z=<=Lo@ucmWM`OgIObL&dt=d;Mu)SNfGeR*(l0P#w zg}hDL%P3p%crEWzakVee|LP~G+?f*g=D~l4oV(SB_sRdAWvAf%NY~lta?3B>w`-eU z6}yYiEcTCxA|>E$owY%bbFK8P*$|H$6F?vE+!y45Si z#ht6BXS23h)?L?oe|PJyw|Y*zueV&k7R9zR^-(Ri-!+q$t;goCh;}c}HTqI<*IBzx zp=*DzRlP{+;@cmlMEzW8?;quX>#f~{H%H|df-pykM0NlTy>Z4v#;IiFI-XWel5(` z<<8;qdKPiU)GdYw_ZYJs-hXQ5s|rceFSVk3p1lewu8pp*oTB{Z!Gc}d_3=-C%-7Gl zFJveE$NTT1E%BT;D<&VhWqZBp>&|@D#`U=muYA>wT{y`&pT)YRHN@+z&WxWow0It# z_#EFj>wHJT$Nq-=^pEXbrovSvVLP^8*?M~Q>Poxo%d`9oXDpep^7L(o1A?DKewMw< zyn`ea`%nOP}uDFE0PO@2*|^->H96w%1u+v$0q23_pJT z?fB;8 z=AT!-1ez@8^VhDcFSxwr&h+a446)~@yeYYzX`j5l_12tS>$(@7erSH_EBF2%p6`6u zHs6c3pKfCwoSQ6YTshzF+HIwkw+g#X{z!l9tL^J?e(SO#L?4+VARv(?d{TJKr zv;?zm`Kb&OZtZ=eW-0h2a>HAxpF7h-rQ7W0Uw@ywPrf2O@Jh*fzL;0mDcRF6n`kHr z-#M-P^bUK*g^fCqGiQ{mTo+$kchM?r@&|3dA8Xm${`KzarMo9(Z|k}jyQ*Qq+?gqXTeLVG9`IbBTz5liO_{tv zO?tlG+C@K7vh0|m*^epqUqU;%&4NQUuW-4WHEAHa?{vq&FA-1B^y62 zwN8qZ7u zKaf8vt-WRMZ@Zamz3p%2v_5freA0Zi_U&lv+ewj2C%v2xzNhPE!9u6&YkzcqoABX# z`yTJ|<+9q5S4{R=MO&Eq3r4zfXa35W?#RQfnHd&#N2Ry3&pbp(GikwfKRcrz3_luP zI3Kr@wz{!POx@?-_3ghscBoEqewu&xnNW|}W6R*iFT^^_~BeHeK4fm(O25evSUB zmmjkVPu2yA+?IP`KFu{WuhHRW=c^<0-fdXPpXqyiSK5d9s;;UR7eCp5v*_LVxF6f) z7Ct;1bvp7w&K}YH<#+97r|B-^?d6@}IqNk0e>w&HIpLxcY94+bM2mo{2)Q%G$WOO^t}-Jex{8T$Dt>;HyQAhSKi4fC~(uxJb$Zb>!nM-pPqev{c3*ot5l9p zu{HZ%eEK(Uu5K>Bz>mz2+=p(JH~mw(_oMrfU;o3)YbrK*>h4o~*`3Qaz1IGwZs(+v zc`Y9naLn^&t1LJ2aGqwbZ~pK+t9RmWyNdE-Rnm8E-+b}b_)57E+$2UnNOyY|kb zruEapN}0R)?mce3r+gZ8YvVthf0Q@>qrLZ)yj-39FWq)Uixa<6BBu7(Q1>t zr1IW<;cl$5Q`%&{uG%kDr*o^uYR@H;Cl|i)B4sav$}~F|K56;ny(TRbG?7|{(JBC{b!haZJN#EBlooCAHUDB zPx9_@o9VNj-tRSe@iuSIi)Z3&_kQ*sxo}#fP-Vi_E#^GyHZFT{LWO76FSkA2N91Mx zXo~h9UN3y~;a&eDRpA}$bQfJVy=`qFWoxCXP%l1Xp zzARH;mA~oFlk2a)o0Xdcx_UagR^~VE&!}ZSzE5+@#kVGV(~jK_@?L3@8~2N2+WGM4 zbWY3d+R7HU{M=_voEEd`UEr*@HIyUN&&r@Lboj&pOY0G_m6QEcT8qe}jGJJ<^SK+qr$acvO<3NU*QT z5})Lfh~=%vCw(&JWV@QgA(~#c>p#Qc`dRz)mMlLrIktZNkG#M9S&Me+ch=eYx629E z3!YB@cGv3KqdV~i(-XIEs|;0{zwnN7_`UF+Rr6}A4ip(#1^wH<{`DvBZdZ{2W1+<} zWv_5N`83~emIY}ogsx5{?SliIXaQyTu_TrA#eK-H? zj-~l?!u};syK_F<AALbjMnE#W$S|?vI`Jvl_eIg(AcCU{6aXj?u(@UnO*Q#Xc zykB~1-N(N(7jJg^>HYSek+;yztT$`^NmLv^T+d;{5`6gWiq}^9-Ag~J9&gXddmwZ; z*YxO}`DtCAvWn@QdQ*yCEkCUwH}kEH=f}f`Hg0Ww|3}%XZU4IHAJbkRt_q!F5XHTG zU!SJUwIUcTp;?sp9O|Kag zcTC*>_{O>$eGx^jm%~;p7F~D0=I-@>EN}iZ*h;I_^BdRIP3sZ;Gj~$<)%$J}U#9K} z*}vsV)TuwV*Xtij`SQGozs>a8_Y?E%rIXKn`gi?#WwiUB&-ecwE&eC)fxmal`VOmW z(-*zG^}Y2Ef0b$Fy83PJOm5Gvc=s-5m;Z7Tp6lMb+$Xt3H;FK}GF)Gp6)*AVqD_AC zf~ANgjdu88k7I(@|WcutO8V(s26Op;fPpjI2KWGyc2M^tOCU)Z6>*dyI=SAOAJF?6zau z#*ZN!6S4&F$ExlrUeT1~w3*ZGn#PL*c@7q4&u-`btbKcOdEWQk*&nJVo?7&1&;8R6 z{NwI(?SC{g=s&|l|L&Un)vxy{?f6hLi$!XGca7-^UZ!8KEbrdZ3_Cb6u)GI!(8jazTk zJgv-`x8Pb^^Y5_8`!TP#Z+`jd?n}?yg|hPgmlxh$5qtL)w_a{XrKtJpW8#w<%XX$6 zIPog)KSL%jv)_-}6&1@qyIs$n?|V)0LjD)yT#;F=Yu{{k6>*MfIUy z=R54TC{5fGy{4k>+ac`}a?a20a@1WdX1H&&=@iSt{%N~AShqbkeiVObZB?y#^yI_) zd6w_Hy!C#2{D;mPKkA}abKG~G%5yYn#+HBa(+))(%!rGTV2|T1->3g~h3+0syZPB` zYivLEuKjyU(s!2HeC@-MJDYCGo{AGue7dvKAuP$ro0Z}5rG4^0Zk>Om-;!hFn4DXh z&U$^uvTI*M)0;e=WIX9TWUTkJB-J{yT}H@qsm7{_MK9I9eJR}ZDLni1vv+QOLf&O- zf3A%Exoz_2c;zSjo%1)_AF&NTRA;dF!WNUsN2UwE+_vr8*=nZRZ3nlU*3Le1CqC=V zth*_U9VfR-oJjC5fBc@|PteEsBWJDLFK-nC4M$2xWNqM9yQFw&|K>*}XU?4A{d}kO zoEiA`kALi+7TDCQpR#T1ezyJUKa@}H(Op{6eCR)e z%$`DDHs$C+1wZ)iF!{eBXHMWm-CT6@V)n02B(c_sayZgA(!E)uMqtOyUhFAKgzMB%}t2V1@*_!OK zx!2lHM1{Zk_>ud_ea?MKFXfatQy(cWi~8a1ICI^i$!~Y1PM+>Lx1rRzB(|A7xIw&V z;?b~==?CLQe@s4Dr*MV8ZBKRm+Iv~^WUG7agRYi%OP}tJmOgKKcFC-g3GTvcavpem zT0BQVx~%KBR9D`oEdTJuKbKekxwro6twT3W_WXOcHh+@Wy^f#OhvoVHsr(3bxO)5W zKiO+vc+~?xMrCQImewxMny0pXt68+_?6>B{zATfryqaNjR$Irl!A7g)ga09axgXAl z$Hk`uje)k9FJE_&$7@ufzJ}L+sWAZ4bF-ysdfixjC23RQkSz0`u3YBA2(_ zo%DNW{ZG*!ovWAZt(VLH=lM^r{Ac&$`UCGpYVtmcH=Ab5TIF-OFMBboXMJM1>6U5l ztMb+?+rD{UpIM=f(NDe??zt(atMkmlWQKJub9nvsT`X zbq`XT-sLb=+$p?rVT+fo-u?U4f4~0`pZn^4-L5*Z{|x#^e@34-k#GNAc7F?h*Prmp zyHqoOBe(3osd_8|Q7m7pk3@9jTV)@5JbsjDt7 z_n+ZUzxtE&{r?%#;`x6}j=Qu^G~WH8rb(^%p|fl6o?P9%I?XpA#?wf}VA08EOlJY*ejO*j?_AdQWev zy*O#Tp+WHT^R8d*o|kk?WbLGRjZWS7+t{vTo$WDAW3tNRtsx9Xo;Kxb>w>(lfA+Ii zyZl#t`ou+(eN8W}thxH1;q1?!^Hu9E$uVCw$>o2@k>O*dB>$GVmU9T!k z)A%`K<}T3Lu zl6YkAj;WtKg7vtL>`d@uUsHEspVH(*@f=&vCfO?cUW+?%`|Oj~fa^-9Qk((91qHQ?BDXdWBzlV~Ys(AW&ggvnI^#(!!!3`ACex4UZ!s3> zd}6R6x_9?<>nm)Bd-gv2wNm}_v)296`Ahf8ZVCwO-DPt9r0en@W!L_#6qkN@yth=p z>7P+sJg;Z=`nZa_HQt$vw!Yo%ZMvmu?%qqwXQ&wJ^vdWMoMT9CDm+`uyD%$UYTv}{ z%JQYLFQyCYJ-__^me@Sz`MaJ;s`SkJ&!D~G`P!PB#lQV3dg~;Aq{q$={ZQ7Ny7EWc zXOHQ}tL7BRa@=-OHMr+4xWnycK{Ch8`5EfgcSO%`{X_JjDZbjyQ*(BP|MQykM`%~o z{Y6*w-ycQ zey?)v)<3cR&zj<1XTIC=`#;0OeQ*68R#hGSaw~0Wd3o;gJFl(_xLk5iOuMLaorldO zBelfg_=0<{_Iz@9wk~{1jpfJuZac+4HrM|+FTI!fy0&&ryz`>tD;=V4UAo0RZCmUk z+x2P>xlcLfyqlc&G{9hf=+nP7{~1nXd)wQ8{$paeKb_-G;=}xwUq{=vK9CJ}zP9H5 zSL@&2vQEDaw*F_hrT0!R$z{^LM+xmhN`{j&cq;C&W%2jMaa{kWQc?c!-Tu-CzgPd> zYZj??ZI5#5s-wKuCzmGc?)m6k`pi{wlG)U{d_!i|9D@^Q-?LPFf5hLl#lIseaQrg>ltq@P7d1cv0h9j>e7$E638N4x8NmW;dgP5PLS>5d}5 z-zA^9p0mhseYNlV&mi$Np7F=z!*S9V-ha^*`yJZ!=v!>~k+9D(x1D8*ZXc9!(pb{l zecJYphIE}so{Blsj`LD=*R0mav;0wgG@s|iHUCGxao6q#?%TL8eHyRT^|yittqt9R zj-2`zQ`37~w@I+o!d6`GW<9 zKCoZRCi{7X(c>dk>0MuE=X%Kt-j2D&vZ+&dLHns4o~e=bB^Nd?pE8O0Sv}`jg&z-B zXY7~Q%Fp|w?O@KArpdD<9$eOodlfLxf4WNE(rK^Hd{$Le^u6#i-tyVrB{B`QwVBQy z$Go%aq%!_JeYWeX$h39(%RyV;<=uC^(5=*O*m5`dz&xcNd$}^USIv&is{b%CGtp2g zzH_aTGt;Rj3KP6@UO5}wk~1oh2w&qNaqJ*R_`zEdDa^N(mF?$N>~E?G|EO7K8B<|> zMCy5~jpLHYjOO5d^W8ptJvY~M%ciqWZyn#|{kt{yk#(DLhJxGvdrPK1xm1>GTK{ME ze}>%XU!|J6-QI`Hs$c#(dE$riX4Cf_bwVG%>=&w&u9$cFXq>{v_^lql+GeZ>Q#zZQ zynK)Nq!~wiymcnE@Vq&9Bt7e04dajUN9TDe+8?p^?s4YZskeNcb9!!O@+0rpnx&jV zS5^D7{lpZJzm_nRAsY zn;15!J@1{aY{`D)j`HG6)z6vQ@oR5|S7om+%-Okg>$`tmzkWu>osKoR?7Z~H*@y2X zYJ7hbKd@f;M{|Xvjn&j`%OA;ey_7m*kmb=NXmtC)XmFIkIE)}+Rb>=s5{p5m|`79sYlp|-{{JF2PciA-eKC#a2 zLK4ZB_i6v}pYQdPeaA5hDt=qq^t$Q5)tgHIC|E!|;*>(PYw~GEVh}KEmukrfuZt9~OHKway?$hkD z$X#+t;$?GS)Ah4TC7$JbjEoiScTFpAXg-y-IQ15Td$Yb z|6t2hn;g6LKLd;QllMG-WIpcgKepCBa>s{ni*~ix>F&RpZ{u#ao@bWIt(=ucZqvTW z6=tm4v#pV55ld3&X`fd|%XTJTtR~GA>AIi6UoogKS@p{)E_mo`I z^8&BlC2jp2?YgL6FMqasSKj~p$Mlcj zqkYUj^z}RHM6*2*=Kch~+^LARSwq)s5>p$Vwn7%yBuKWqBi%5^a>;#hixy+5p5zvhL}kLJK#doHg3x*}=2XU)=# z+_hfbPfxZ@KUc;rlzVs5p)FJXE}1j$?Z4iC{qHt??l?S6Q+LwepugewSAK03^>)3o z@R~_B_wVWz`U$(5EFbUOYT1^W%3>umXC=Gx57cP>*q{Hg z?8vLC^mfy=_Kvgc&5lQ0u{-@Fe%YhlizAj-3NI@=HF2ADsp#F9JE}{!Y(7;e;}P4x zfLX26&h@OWz@^=rjQ%9AjLWo>yrQ?|uh8!!;hl2ImrY(Ocs^Ho%ys#+NV4R~JFgE| zoIGG}5&fh3_{D#R&OZ?Qk-onEQ1X8Uj>i=Vnf3XXHl>)g*>Cx|Kj+h*-sAGsXZ|xR zt9Z7ny1r+hR{I0@#ytU(ZkX+V-0OEq+w!YLar)B3waPY3EPYrvndq=xF+Tcf8f zdA!u+UtP?${ddo;f4%HhNa|?~&ze2&t84$PyEp4y`LucJA6@q!{>%35@`3XI47Y{+ zb2Cd9zLIL4V|>dgMeww>r_81!2ZfDV+*RAyc?xV+f1CHib={9g??d_zS;cpT2Y)o{ zDA9Efxb)SUJJ;~xUG-_(Puu00EuX8*oVELgv0?&m)%An%vhja>euO?;FOpSNm05mk z?t*#V6*FfUpJehl%}{2+c-`i0;jBmh88YMhtkxg$<+^L5pPIVz^_vTKW0w9XYr1s& zt>9smb%ImgrA-ZFbMs7l;&y1=lFH?$O;_iC+8j4+cWrK6Tgd#)O;2yC{;*#Dx;pyY zU4M%o`+r-$mQ&nY&;29q{86!6X*&fQy>U6KBh(qH#HDs9217qyw%Q|~+OV<&rkE3cNX*Ryk?Q)8cgnq@6|e%=9> zHH;yr4bx3u)-&4_K3Xp>FO&ICKUXd``0%dZncildxw?CV+`mOv9@JLZWcDCkVNOF? zA8UD?W^Cpc?t>3+uF?HEY4Mv&uL^Cpy(%(_6`VRF*@jK#@ho4BgLZv}zt_zD8+HFm z`JP$-zU=$`&-3kziu(um^HnT<=zr)xgY@|&HRf9{{yFRSIPk;$hFrb3x=Z@hx)h(B zwn<&!J}GtTkMs~e@j?cM_kHvEzs@Z_HjnRP?edoQHu0=&^UIgtdr?)kQg-Soo5@iz z2enRpc3B#fZ5l59GyGu;)AHGS|E@57ZQppGyY%E_uWcXedzXsc%gWqdUN-H`#?vbO zvU7N@3(s-gn&^JVyJrK#wcg`zh1cG=^+P)-daiul~eof%&C?Ypq_VxEsH zEBSB8i3B~fZhEWCEcCE(RrV||^Q)#J{~6xiuG3!qGt2+bx@+I-tN+U9&zojwn{6V(Rr^ki&L%bo&RRH+o_H!Ig>rPznyA3VJZ6Z%l$n| z|Ec|zU0nJyyFOgg+Oza~(AV(#-uFLb*k*oYdw2c#E%rveFO7OD)!WalU2#9sOgHCd zoW(Q)j&mx-dgn7{?S2+}%6-LAT@kO#VUzBC|GE9Au3fmQUaZ)yI`7X_f8NwA{9yk; zzSBnYaX*v$%8I;PrP~J!?ztDQJGpY{5vkJ>zH1k5Yw?@94<42S=|`W_qf zIsBt}%bwiSbrmf4qE)7giS2USzkT}evNOjP*F~&yeU?{pQ&HmfNrvmCuA0@SE?xJo zKlt_Kt-D-)=U(ssve#bT_?bR)Mf$P-4BT7hch)I>us<}jguD7j&Sb|QwQ5U~=D*B- zrhD|J;2r6V(3Xipof`Vziu|r6M%79GF!^WvBd|Vo?T_%tOErG_ORqkgeN^4Ia%-&5 z(bkZEYP}TjjsxR~Io7V5%v|&r;&-)*?KmEDy-G2rH z{|EjO+uxjj^q+w}?nU^-tU9F~Qogf%Sy#ug?21U5_v-DM&w@AJ7W_-J?#Y;a?o;uX zCx#tK9tTgja&dlJelhIs*MD2$Km2E?hQ1+NBL&=u#S@WgNSFbz1TIGHIvgnta-)%3;omaKU zQ|*J*+dC^?aTJE-{i$8`a#re_>tf&TXKdLcwqAPuwCEDWs~a|11T5WjXa2;9k39kG z3ogbS??|jZ>woy#n*R>B(7Q4y2@MT+eG8zE0?fo%s$g{dvn8) z_Xkh-?T|XZK&7OLil@u6P6#qik9;E&aRhStp=?ni!Dc*TCxu|*s8Z~f`F zK7CWm^SilT@n%`~sPrT6+l?w-J73QIa@I+fJNVL9@7%akue|-YIBj;id1NAc&#q0= zf|yr6SNIsS{B5A^ts0}tZ{M2gx_kXD@7!X|`OakZ%QfqGgtNjvp6WKy*`)TQEGV?` zpw0DZv47I8|KrJ$GyIS@zr6mDWB#MP_O2K6zgAbD&B~T|SMr}>o7bi_mOFNTYAKo; zd;9#1#yN}=4QpobGhx;lFb`cCYUo6P}N_t>t@qa)%uV!s99?90(Lk$w-K7Nyy~nn9>`#4LA9vLzJj*73d31&Qk?X%||901YL1q5Peubuerx7N1D%tr4lEE%rbe~dn|&-{b<(evUz`dxNh(9#N_A@^h?_2+5$-lp$Z+}G0)V7PvUG)30{H{}{{>_bC zUiw>A_5M+Aeeu(4GlkA)K1zOMvS;?;*J|rtmDexa`rCKDV);YS{DsH)4d#T7g(ciI4pDvrPW4=LQr%Q!g*xYL_`qL#`lpD_(73|Ufxc^9C#rGpo z`8{%WU_dVruHAR$v^c= zfBn&Zw|{1?rSIN#myJ~{q{-x#X9C6?H)fe*Vb6&OGU+07{v;`cDa<4E7JYi z;)%0NTIRot+X{tb7w8mWa4Y4+{wqL)_2&Ho)*<9NkRCi|aSKL3^trysoj&mef?B!BBxZ;6tXk6TmP z?q$zj_%tp*T*Xts!N<2%u{nHU+~PmXKdj3i%UVZX`;)jXK03ld{)pAxvv-$#FBf*_ z@>5RJX3C#qtivdtae%ow>Q>;?d#~3FyAvPD%2}Fx)QNIEyV>ha&%T7uk0!oTS9R=ZRAk)2QnKwo z!{)DN?g{PRc;%1R^jY6lKU%FXniVz2s(v!ho?yf-1=#s`nkNa zs(0zTx3{lL{#HAa-1~cfJNt?snGgDZgx3jnJO9rA)^>XL>Gk%fK5c$}*W2#y?%D5t z?k=8J_Ppmo#rcjuzU)Wxo2=6N_5`o0Xm>WhbZ1-R(hB>~U6z@*Q|_PF2|jeuA-Uq3 z@(i_P&2UK#ri4jKK_xG@=C}Q-TCTpnG<)CmuzSZQ_S;8$x><*2~M$?&Zo6?y5U>ITt)V_0C0S&CJjTx(T-epVhAaSwC}C_;juJ$+uRlpQXP) z+xzAo`L#dRyI*{;?@~!Wcjm9&quZxz`~O{Ct#{(Br|%JthBIA)^Pk5Q8C0Kd+%HrU z{Mgs~h*W-O$xqpmA8(g$yEgf-RbOj<>9e1W4aIv&kinuZ4F)J??*6g=!%aKwA5xDGKl}4?vh?ie+trI1 z<_cw3KAxvAJmp**9vp;mt{>xi-Jl#~{`8ay^ ziQGHK-cNPEt;||{XcB+clN#P-aX+RX_-Fn>=UUX;>0LI~^^&tL?o-GL=5CwDGEH~Z zwu?m*4f4VhZXMjPBw=R%ttnOZM~n9VbI&c=aNp(E{dbw`Ki_w{y8O-Y)tTN)vc=>K zHpWXuyx!He=lb?4!Rg+a`O2T3xonDVt+=8Qw8SsM>{I0S4eJgcvgi2P&rqXxu}1UZ zdvR&*+qN$8v9I!%xhvgSe0S^7Gn|aM8y2@$TxXear6*5$zVe69NA=&_UY~VeaO>T= zT<3$^f35P(U0AU_c>9j+8Bg=Ij(P7;Y4SeCmE>Bd6yft;$H%RpI`=m#@jE zUp@72?#}wOvHfHB!|<+u+P7ZW$;V4Zyp&_V`qp*c(&fB7XX`#ra#n+JL5boU(R3oN8#h1{|r*X$F|Kgxu9}s?ZduHhuyF4F||Fjt8}0ETPDs+|5PK@ zlxIe%DX_+c2-I|cTktWwW38P+#lbDP?nnLQ+y9uRSBqY2UTM3uz3qAC^7K9XTT|k< zxm{e4uNm~9wY7bkdE1J820u<8jT87dZSNya^^db(ZG8Pp>qqLA%b*j_zWfSVn<;zf zjQC%X-Fv1hbndU6)_wZ1sL9tUxqJUJn4kWazg^#S*P>M!vugLd{AV~F`#$?i`m*D{ zb$^yWdj3x+=h0vNvcJv0cRkM9UHt4n!`3IyZ`awkZL_idAobq&$CdvK0{LHK`OinJ zTNrZTkNw}dr4^H=-qL?(^Zj@DG?Cn&!tC?2K6CTm@_*!9@i1{``=PfF53gRkuuYr0 zb@wOf*vVep&!n4Xe-MiOZFpGko-@DekIIkPM{b=zTI+wfPPyh|^f_?vjtgIxY&FkM<~?()oS!q}U`b`V>*2q5d*?jO zzV`Z<9aDt7RE@^W$bR`!R0a*Hgtl z?;ofAR&N#b<~`25@MZM=O=rLDRd4)y*Z9b-#$DG+dh0$N^ESVtdL`xZV%9kAv^`Tp z-+A=8i&8{K?uR3<4t`i#$-^o7e4ggt_8ReZ zf2_BEcsp(7`j=KyqH_CY+}gTtd%e&d_gwuM%d#W#l-k@>ms)Cn+S1yt5T5zlRY#VcXQ8m`WC~H6v(XYt@iY1Xs6>e9kV9){u-wb_m9+b zzW68qaeJ@Z+DEnOhwB8k)eAZ<_;KsE^Td2bebu|C9CL!S7jKr=>1lX-;B{BW*0o>4 zf9k6Ii_3O>ul8Mg>XIp?Ww%#f)t^84zI$$e%#YQ-ZC>89zo@fj%gr}mX3vQJ{ynO- zJBzo3b+^MLud`a7Dj(Z;WzM|`y<*6dy?j&iF@Zk;ZTHXk|8RI~BI>O3Tkg^Oz|4(v zns=~t8r@_toRMtpXA>OrqxTViyS!+=jbmwX?W&6WU{BtSJLY}OHM0pgEsz~#cC$)P&$R9rc!+Gm|iPz>Ma_R{$$KjVBfghW?2mM$bb!lDR+pjbCWY@2~d9OpY{~X)3 zhfPr)HZM9pU05l}_*+yqJzxD_{qpsBOYPhKW&hb9@ArI9MZWt-wwal0)z&`J_`14# z_UrC$PQQ5@bf@l~f9Lkpu)pu8|NZ2c_hX&A+Yf_#azAADF>Tv_xzzf2x?Whe4uv&V<6k6->%-g`N%*?(8x z%ePUQH$B{h)z8ZDf7DlAv3|aM&_}lU5A?+<%>6IziC+Ap+2xwa%0G^x{as)A1@Cz; z^;epivSHWU-UDhHPuWaD3b%GKo#E`2ee~^n_~YF8)IIaozI&gvHEhzm`>(gpJDGbi zuWJ3_`!~0{d?7e~vl6boTmSYe$HQvw?2hLCuT=SxDlh&zM zAzWYeyX1wGKmKQUbMi62*sWvRq?WfAu8MT2xw>Qf&TXIGC43HwSe#NWZL~l%WkPOw z7|W~qw?6g$XE?k#JZ@I>)4EI3R%!29`?_rV+pA@7?_aa2vWfrppMgF4hiC4?ExAd? zYww&BY>6szRkm6+iL<;=h-s&Cp?X91re#jgB9hBumrfDdHz)XIZt3%X@lStFnl;Hc zH~;nLKTTa4O9G(BG%-Glm;ERHqx;dZ=4CIX^gF-&WjbWN(Qaw_s!X9Qv0qEP?4m2f ztY?=AzB_Exez-xjG3dbKipdYwcbBeytA9kS&gkNn`I7F7V=|;#|JcctZ`!$Knrq?H zXw!QyQk)iN8lBjwJ2k;U%syM28+7C4d6`#YRp&M?yq_uMdT(iV>7p#-h%;(koLqSo zGb2{y2<<$mkZ#)j+adEq%nxS2{c;K)L)Uk`(>dDTzU4mO>)@Mv-z|E%%P7b!P>OHJ;njbJgSW&R zO!#?!w0^nEeAv)Evpm~&R*dJ$+wM6@ea4CkN1WeG*ws9Db^+r$|3~p}-ijZU@0gXh zdzJsgFPGM;Y@0gO#!@<4sM3g&ML96j(?jyAx6%<#N$CS>dxH-fWR=~MxwP=wI_2Hh zl(%&1=PAan)OpA-aY`BMCWr6qr_|ZhU6Qh~m$gw}==@RH_ruotZGVCjlUHS?M|E9# z^!HisykHa8!_pZO7fB{=oEJPVS@go~(#O*G{GxwtS^skV<7bJ|T6$rd0&ib5&yPRu zuY9X?f6F}0kNtfmwa!Q88Ex3}VO!^F*>t~)Z%T}Fi=S)lI<)z&bi0reC+8to$s{&E zBYx`#>u=`&6Zrj~f#*)Y@ZU8r_TQ}6|5kV2`c2vO38kO??t=D1-p`-jSDsQ?5@+?X z>5otIQMLE6hNnOB`STjdM$FZ@+1;tjI%C8AMg^DX_4Wx#e&&y#uHD~qf6MeE`6547 zADZy`V@2$j-h`L`QmS%W&zN)Q>%3R*=ljmLe|NSLU&wo*kDJraPPNTE=ACx^e#-T^ zZ?69lO?y}6S6C~&f9aF?r#*A;@4fWped~V4b$d8JN=JO`{kMGK>Dm>MfBPbA%oo(B zxEA>=axN^}SF^9?Pp(kS44%U>A75WOGWp2I6ALe1S*`iP@s;+`f2@C^KI-?@IIhT) zGoEzZ^&?B!vG@N3>)%M9mY!`Q^hPDtSNmwK#HnT-uGN!_*c{fU?4SEKCr_=Ro-e?|!CtM{#6?`+@J{qonkz(8HsHR1shxl`PQekQry+1OfiVBecs!_u4o8B)%-S1p^V zw%y>`IvvyXBIh5TH~;MyUA^4+-WEIm_uKO)v!D9=z~P`J)6VYR^3adrZ+3p%`slh% z<`VU0{jbgo?#H|eFRNNVS#**=*B!42zk_kfpH7uO4DdXmwDPXk((lnfZyx*4&>i;T zR&_*S%sI|n_2pBqto@-|wR-KlTGPUZ_a$R1>^(Qei)H>Q@6dg>_UWE0TkZ;{Zau=O zk*VOFnLgp@GdG*P_mV}`3*KmV{&@OZ=STYE6sfIew*NZUx<1R`)@|<OXl-cd_H{Z=+&uBv6<&rX7@kpexa+Y+w)O- z(@Txs{Gv_D2CAnvy=N&ov6AG2Sc7|iPI_Hg`>W>7e+GM9$1M+@Y}@pE{=-Fo>>l4X zQGU$1Ppx9%?yg(sWaShDPMiEF+c{-Ik@&MtrfqzI9S@qSzVie&39Rqq=4q%s zYPD0pd!OpQ59x>cC13waYQIvlR_xQSn~~+rItA9tWq$@9O%C!BD5_RDn<#F%V(s1i zC!X%({iFA>|L}Riio*dP`MbB+3r1J;{du)1`sF;i*?tvzJ^_ah?|t*&_$Jn$Ue62F z%j}|Fe!0th%&NX2^>XUR+`g5M&YViVp1&pkOX04t6#i_Hi|;rkLms)Rtb0_cw8#5f z*;O0YMSnu$`Qq=rzP>AO>BEp~l3U* z7x_=W?p<|zQI({VS5a>H{(C$3TK$UOb>)GL_HVO_#fRtf+XyeV@BPx*1_3FvR1&efVM#!EFYw`T-{%M-}v-SG3A7AT!Am4Vm!s&N-e%n9Q$?>xfPdC4( z{V`?Mq|A2;-ZR%FU(rvwt04AiVP`b^nTa9`7*|jJAb-SPFiW1zPN?F(|HrhYk7i5T zYF){ieCyS8;a$EWufNT=={a1gbK*4rRGTIj^%AdlWfzy+u9aQ)XUY0{Z`Idz-{n@_ zz4~UabbRfdy;gbsb^WB>vQ?pfq+hZ=ze4@t{yv=+gICw?>0W*tq@6x>TX*;R4R;;x z_K|hle;?}7Sg8R!b{$3^S@C>( z*VE9nPSw!V0$E4KDNp@v4>B~r((*d>(Qo>S>?q#5ebd+YE{OLL4G!AgX1SyxkF{$} z!$MU@h7Aho78XYtqNRL0B*e;o{C)msQ~YDg$=g55`LC~PyM6dp=p@$dmvtTl&d~{b zu^=Vki|lcEub-DaJ|6Ehk-zac^6KRHeS1HOShcNO=Daq3$E>HtYtCoNEScDJqP4(q zOD+QsPpEx|oZ|I;%qKr;yI=U@x+beYy;ira_t%a=#RIz3l#8pX8Y`ukYvEXKCNO=p*0ubuVtUx7Ba;xW1KBKeg+X_SDY1Difv! zDxF%gz}nG&3eT)>s~ApRw$WaeS)XYb<@D*!p7SP`T$XvVOfufIIeF@70me7%QQP?@ zCgw{m2@CtEw8yOuonx!H@tU2Z+gc;epu|D=?C#P6YfJ{w}U^1 zP0x6@zmoY>c)@qc6W>K2e2j|yBlnU0h_t;}{MLW_*ZD4xjgHuGJ^g~t(JNAjbx9!v;0uY{;0fXD}U3q zw-0Oc9*6ws&V0=4?Dyf=e7V(T%kx*U^{|hJ-W;lQNC;EnRLQ&5{5$-Z zoz)Km8~#T-(ss>1Wc4fd)jq?^Pp>>n|Lxjz$K6EfkQ{Hy7EgazV)BLN7>dlm!h_<&R>(Ex!_K$-9nqQ z<;xRKEu3<~Tll%<=i)u@P9`4>x?-RHpP~KNx~+Qa%OCDYzmmG^^7?rXmrqyAmg#ms z_9xB2<4fZA?crzc3zXcjW6OO1p!&UB_AcK2?KZsZe-wRFSDa2L&5iLaP~{f!5qsru zI!o+o^NL6d=Q&sJi&y-PZ9g{a`nT;}rn$1inr}i&t2)uyZ7y!iMON|=}g_T zdy3xo)hcV&BnkEy<+80aoAOrty8C~Ie3P|z?|s_#>ic~s)px5sORoQCVC*jcv~}vY zOSe_3{hA;8xBXN9ad%x-Sa8LPir>y2vp(j1_07A++7y{$*zstZ>-Jf9=SdoDTxUh~hs&-b7FOZ!kf%OAxD)w4TIx}{Gnn8m(Ts`l9G^5SG3 zJ;#iPTc?G!iCD<%WGt2`d9*U)Kf~tZ4O?v4!!Nx3(miuaY|xF!^ULmMZ7(mocz`cV zLwQ>t-_rSm0zGzV?CQsbk%W!trPGgP;jZx37| z5dCwW-_wX=$5V8cGZ=(2RGLlwXnn*p@uRWdhwmLva;0RYUG`_MRX=84r;*L^Z2u(f zpO5$K=!~-zyBXP&5t*zZ>7mjUSaa5WwfD!}n?G%R|M}&Lr|<5?F3q2n8C!XG{q%Y5 zm*)xp$o_C8=5?LqrMG$d`CEFUPKREZ_3PKSXRqI`*mycyWxDZhPqV8LAGkL)Fiq^! zUU0pR=ZE!?ec~Ve+e>vNVqW}H-<5qg>du`r%N`{uOCMg{*7NU~O^$Zcf*zS!pXT38 z^JVkzm@nzxX5(D^sFt_X)-rc1_gpVuj}5=#Req9VV> z&x(Jt@99lpv1_ZBzpVdUUv2aGm_5TD@y81vu74|S>vwgJx$way`?}Y~zs%3*>FQZ^ zZSyg|K+#I|p1$WgAMM}#e$X%ZvTU2w)wad;JW|Ckb+uoYAF7`1_WQz}GipLH3y*os zoIlAwDEs1yC4XPvU;p*j{yo`uw{F?^?AP}`lVuQyl3CY?A=QHooejqP=|3|aC)rYJ4 z=klKH|ERaLTIPv@!+DYKd_4z4Lxgq)Fi5QX)#dHm_Vv!gFZxY!>>utmKk60t|L}H8 zRe0xnJ^ioMo{>+zcP?k}@+h-*WJnC>+S{;}pQXm)Vx638soaEHOXg?ZerC6N&Alu? zrsr9!V@@l&N?L>}6kpoar4g0;WS@8Hrx*OOf9pO#r&r+9Eb2$&L@(9kK79R_KW0;Z z;HCA+pPp}@FhL?+p@~Dwk)e%6z(JvjalP3`^F!xbtFAmcxpa>jpT>>2X8a# zEnFB`V-bC$G-9jt+-$LObEOA9lRZl={rfvBmB**=_KBa{OV;vt*XjN6xy9D9Utmw{ z!?W6+yLUB*URrw9^u`QtJr<>NwW;o{!q&m^y2Qn_Fw{bBimeTrB1 zY4EpN{aw1gL;7&p^?+#e+c|pqN3MP3uI{)wr?N0Q$zt2dtqm-pUNPIIFaGj9Z_oQh z+d6ONmL|HH*X(__Uu^x$`=#aCU+evT?0?YTuutp$x}B>t->c7AM+=)3>eywBwM-b-)x-F&p3FUGtjy>oWVN6z*)t$J%z@7U~^(6DB;$o!A* z_2Pt{T${6J>r~bIA9qDnzOMThwmb53ad2^M@{jsy)7~!o%fIy9KZzQHOM4ubz5E^! z8I>J1<$Y;#s@-xW|5BCk{kQZkx}}_cvm@WMsfoeDUj1~ml>EW{TtDiMzJFAmo&EFc z>ruXOK-1Eh1Kdx>lV|Xg)Z~4JE@*|I&;RoI~{|r95Ez0-n zx-Rx>+RoC_eDzyj-g>?>7MUb!S)C!lWal(r@9x>%7Q0EHMHMbG!Vza0Mj zbp5BxU+iA53-imV)_piU~Z$3Rv{nDSv(p5J-?$uqG9C`ZpUaJSsl_WOp z4&9Z@v~T8q&m|joM3-?ET-?yxpLU%6^HTdVnQwb9|9rjuURS)l+O_)L?Z0O}D2wC# zA^D)bb^E{c&b&Xi52jyz5_YAuU+Pua#^d!em)6X4y?XRQ#?7V1DMuc7lnd`(TPnx& zA^ecMY>m=28^Z^;*J>ZR$97d^9zm#Ubia%o-e!p$50-Tt`tR#etS&HGZPHpNz!A9=@a)XANa zGV7^*X6K{!N5>EImIr^FQ^)jSPE_6+_D;3!+e)_YEzC&>xE{F2aL>dw2gIC?x^(PF zKU;Bc=i~W82dgt5uNVF?jh`udyFKq)l?!X1UeZ1FTKuHxY0p%daKW8&dAT#!KD=$r zseG7Ws^IkS`_Gq~KYF`7+IGg9weP-#ykAQ>+{QVWjX@h3wUhexGS+T?5B*3fNMna!(GlFBW+UuF4kzf?;I<1-lNmY znZvkplUKowq}xdWX1xogC-3f`SEt@v{7`S}@`|tPlrJp4+H&q%alA{@y~?$ZXT40C z7F{NIUO8$>rbtu48-rysljD7(D;~SeJ`!dpTYZpkfBACRAX~FkZS$K~r%IkU#wK~y z<9u+~)U|(3*GF6b`n~=7wq25UO`lwwH$CjjwOi$v<92USIw`GPdyPe|@MtCf!TY>_ z6q=9xTKb=%Ve4&up&w1>;i0uy6&-Nw#kpC$B@cgm$efp2`xhlF3eRN&;Qrt?dcVTAvw*L&b#KYd2_}X5% z_Tu*A78AoY9;Y~1=RB@zG`O~OdiqcM^VyZ#H>I9_u>0M9`i3z(fY7%?!*0p zFAby4r!LE$zWLRxiS2JE`>2|G zKJQtB+VUe}`}Ag2w@x$IVf-pS(^y#{l-a0habrVP`&+?B9d&ATXZMRd_~3iCjqtQSU21##UJU1==!)gD@(urN%Xrw&bAXj=X5i79 z-w*rQyg#}hj2ALp&FgnPcBgN7z@*nl{>e(0yv@3n&7E(n^VVZ})rK~U$gqOnkISMP zJVmcnG(Qr(eq{em=|>{l%>!RW?e2f`T6?ziigmfEd#5Itt<2Ubbe}NUS1O_Dq>|P2 z+}(Yv<16zsw!e6D>GzlL*O&K1X3RT#vgT{}-}vXZ@7vUW`10d>;D?s;tIjXKm0@+J z>d3Zl51K139#uLnY>>E>dDl(}>urpE#j&l4ulMO)s*xA#+x2X*XwR&b35yj8dC+{ZGz*?>ffnn|t5>i2b*nOJh-Et4N>&t4N@O%=RPm+h*|} zUC$j;QG8_M{qu`sp7`e{u8NJ2oU$@#lI@;}n-}spx~ck0vNP@3fBZiK_dc#4S+5Tj z{@8yse4fmeoZP3EzeIQcXXuEU+WKVTNrg=1$lh1pW)TWMXLfL{GGJi7m;HSHt^JSY zH`y4b^Ea7-R!ORN+QcW?c4gVA?P$7p@7RGqo6o)1jQw3+Gog8Uk(EYSME=y(y3xPy z&VPA*(SD~#JGWQqAC7G$nbY^#eOMz21d`ChNaSaZJDyy~0xJeC-2QN7#! zEGfB+VsVA19tNL(Vh>kKJ50rkj8VJ>5qF(N|35U$TN+b-zPC1XP+2v zSaJMF)cyuLn?J7mx{rLgRk7+n!x1Zmi#DpJt$Q|AG#y;Dwe#ENg)=9q{q|&IbM2gZ z_dDC#S${t@)s}zut+QHw^+nSBcRS~OyM6Nhhr4CAuj09C6gK|2&hzPhra+C`&ax|t z7nfYSnK~(6<=A!4XPk9S@W(vqezC0mf_Xvj59%l9rA_yo zUf3U?m;BpLx%u>MjmamunrwG(%GQ36w)v5SP4WZjn|~4?^}5`fE4ohit8US&SNHt4 zzPvU0PMW5QR?)W$1}rtLcCou>mj!%Zc`R05|K6qScYD|Wv|d_#sW^7>&!uPP-_PA` z`*!8m?M?sqencOzI(lsFlH2hoSNgKwth>4@>-((kqik>Qg}6P7v{~^z;Z1+mBz76D zx7knsGu(_n=3V}HZt~$z6;o?!>tRaa*V!!vj!wO` z_Hu^km$ScoReKNJ-r(KV+mcglW??_qa=TPk^o#!t=G)I-$c$Uueec)b)AbXd&Wa=D zM!2)p2mdp$yxYgTwDsH0^K$*63Y%dbK2$FP?MX^cAuH84mdu<>ttJy7_14!(FqpB|2xH`T1?zO;Nwg zJ>gSdiqBe8^it09TIpQzL-R!Te>}FmK6U*J-ecW~Dz|D14;R_I*Rb)`id}T(kZJk! zisLgFYgT=3uC6_IXJzg8vR6B+?&$r!ski-SrrN*P+WX&UZtdTgTPOCzI_3jk>q7pm z*>y6TcWk@zfP7Vy8>;5YL`CCR1G-j!SiMb!w&7E#iU&g7g4|Yeb<-uZS|Ya zx2>;nwa(ZT`fq(<{!*Q5pO_SPUs)*F zwsk+8xZ($Aj%4WzpG`k{rK+29N*?cY3D-V%F#&W66-%}>p^`X24FEk0bw_+$IJ8SB>Nn(hrhe5umh?3&c#)wkcL7PDR{IAN|V zF|j9@X~_a>KkX;^-H)F9=sx}{I8Lwkk-xxyh6L}2@A>cFs@&DKKukMzyV$2`ca~jy zsu$UOzU?t&-3)VnSR{$Ty+b?%k*du5+%%jQJgT>HeWm~(Gr+^wriwjJ4YTJ^H$ zwhvERN3$Q6>-+<YD>-lQEsJyUUp8px*EB9J_4hh^lJNwIDo#Op0 zd-Ol7cFm2;Z`=MWZ`H@2Rkd%nMcsbYo3%0T(_3eoO%lq!=eLR8T0OZ{v^@Mr`U8HB z8tvJKzhBzlwns8m`JU~8>v=i6n@x^|)o;@|ty4Mi+2z)}6|Q?%RR+ztvvY<oFX>su&>iqDrfKe&(NIxW!KlR{PJGQ zV%yzyn)ds{_W8SRI{jhke}+T%Tk3h0u5Q^ckWuB{^ghLW{_X3#E}dq5GwD-W_01*8 z#%6ZEt%XF28-v$1et7QsA*}Vo^S4pC%R1|`w%u}@@@r<~Iwc>Y+2ux+>%yNVyg1r; zkVQu7=lMhNLi^t^20#=zaNnE&i3)n|`m>Tl%1%wW8?z0V$KucO#DHuJ$iq za`U9lWYxDn+bSiOPjh&y(qM9_=^eL)?q|Wcsqy9&vyTS<`2A4)u=S4TlRn4n{Kecy1$EXkxkl}TdT+8$25W^z}OrEzk+$Hnx__$9OcD82u2`-%4I)n$KE zmp}ZzO(YO>_#*255jItkkYc}SucX{=EqQElmZ5U$^aHaEKSYQBD6e1TU7Wk({E9pE z8PP`+CyDLcJL{!n$TaCinc*yRgk~N%$CmMT{yH1q%*SFMpLgzmsCwm(`(m|*>tRwA zmoHtq=+nbf_;XU-hm)E`Pd9ii;9@vhb^hDj?SH=hXE^TvBk=F?-2V)=^&c<)6Jb2? zgZW|E<^?}?y*_HE{n4vpb!@Jct=g^VT9b&Qyqk7T+|=Z0?(r^Ua|-`NhtTyNtM=b2 zyZ`z6e+F6mhwca?16WsStw(TCY{=?bjbvec=I#3P@qbGGGqf^9En4-T0l}9?80{do zVo=y*k?!59E1efmI45Zdc%laXM9hhrkROvXKP*0^yFT`1Ro@ogr)yixqyk#*_?sSi z$>O!IWa2TSPkP=~4MxS0b;3U$KYTCnVxIC9)71F~^&jqDzwve0Jhe&OWtZ<9o3wJ0 z%hZQjCv|dUQ&O#WJy+WNpCMDf{a62y_if*P@y*V+J=4GLrMPi-^yPG|=%Xtfb>2@rKKwrhodgqZRkN?9<~#Urd|TH}T==*r4~`muuqFAKkWfvvs=mIOM9p?L9%a z_iX={Fs-_h$;$S`trHvdKAha)6ZhxtoBf}B>m{phT@SmuZ`#}XzuNwLRi9SBzI1&!{jwOU34!{@r5!*;(~W*(RUW+D9b=RT?=@hUztJkdVG^>ss^cKF8OW`xt(#_up{Y z^xH?imFwKs|7S?AnDyFMy!f>2(LLuHZsu<7IQc||p>XGEok4ZQ(hKy$9DChPv*CHT#a)M_7qw3zH*vZ;Jf3vYx(|v-X(kf zKF$95I+scA^YvfX>vzYWdhY#5zVDac{53Z-d}GO_(goc6^U>q0GerQ5H5^_Xo+ayn=iDIq^& zapSAdh8Nk-y!o5=3)ZkdaA`Vz*lOGI6|e6xTr%OF`r+SB-qv^Bt0Io;9XFQl*vy&P z_E@B+zU*-F>gyYRC_h}bdHJ8j_9L_2&MEH|Z~XFBaBHM$WSsYsdF4C({66s|Cmmt9 zDKm-B^r=^#`9<^f)Aw%w$29%#-qPABW5wpF!5f%uXXij{VYBUH#~*>(}Mwu79xk z%Hql|+q&*PlgrWG*_b=K;CA8lc~hU*seIr+sxR~>{KMb7_vWswPk9+mB|=KlGY|0+vk zQ3Dq-_`t{O-O>l;#P?pC?snzte}+vfk`tGxG~V6SMI zxzFD+E?M8Qhkwn>S!}!4->W!vHQ!_|zklDIxIJ!+)|+Rhc?2j4?wDCPiRqn68t49u zw|uW%J{*1C{>8TX=&b1-3+Gk)3VUsD%~$SY<1Dyb`RS-kTu6^{?j`;?56h)K=c*sL zr(w@%C%*Q4Q%&~5AD<6iyYOSjwdh4`c~E7-uib>8~5tlM|XF{eh+e$S!9&bb8_y3wm4CO;>J*-wA=^Sd15 zdcz;vKgd5STmMnt@1uG99?u0?Vw-;~_q`&={g%yb=jB}8N2Q^k_uZE|DYW_2zo;YY zqRK2E&Gx?SW}iM&?a#XDPu|zAnYUo}H{Y*MbE|IN{urIVZPvWWtN)bj{P3UQ2(S2| zU(*lI7mGZ;=2h;}8`*BYM&6b;lP3l1UD=y?&0bGEXNyCFCBvL>*9A4HAK1H1Gfegh zAGedfR%&^AO;}Xj`h1&9A_3<_1y5RsG!=u&-tzCb&rJGxv+wJ>@7q?_ zrWWQLS#?(0&EplL#G>+mXMOAL_w6sZy7Nxl^?!T)gqf#SMTTergs-L{(R za6v!Id**izXSYA=-C#T?SwW`p=|1Tn(U10v)ScbO^}#2qcYmvCJfCN7V%@A`6Lp`i z{S~pXN3%1%ah7D1#!{Y5iPN7dv?;Bt5&ZD{$o!UimKu*)$0jE}nlE~Mb=I%Gxr<&d zG8q_$06d~iW_oc8rn4fWmaUKf%Zmqk~YaV|?X*{Bd|bj)yfu{dLB zZ&B3qx3Ld>&HcArDLJbh`_XL1FP=K}J$o+cKGIyCxh_KIGiPdV|F50(qJW9p?+ zkLTMzUzKb*<|$f#HP_ZQ@AxB;^B?Wxnf6V)bk}vkvW>MXqj&ta-EDF&q~NWxWzl4# z`5wKQsU_21y}s^tJ1%Vh)0LC*r+vD4^HPA{R>6 zvIl&4GC}pwYeA=`(wVV~C+X{jTYETvKQwPnczK-chdmXW?EC*S2!6E&r|$VU(f9)z6-C+dbh`XUG}WG%eLyReYnyjGFN|{?wbYT8)HoW?vlS7 z!?c9s&-r(jtku`2{HXT&(fnce1F^*`U(OT#&(PBSXx{d6z4f(A-OKY9S?^w1bYRb< zgjsud}1;dM?^;wQpN=@!9Sj z7i}CfYf}r$*;X#`&u7?m>F6qV)gzPZexFWtnI-=B@`(r!Gq&fl?MK&(X8z;-P*bOL zWlM+YTw}kzTjxDEyWOmCW~lG2w&2e$(r4ZjEN7AWEX)4jRK@-We{SV1eVCiHlJ}%-g@|F%R9qG`PL`TO}lnCT>DeLulUq|nWg(K>?$k& z>Kz+@`pNxi{~4UK=Luf?W$%CJhoD0952$Ga_ za+7i?SYT#R#+GvH_OJg8?YEz0vdmel|N7Vc$26)KOOH|v!8h$pWoD{ zd)I&dx3|{RePyP+_}f3uk9Xha&yWek1B=-cI9q8rS`04^O;RMWi$qbA6oi z{=^9$7uS1&lka=~+OqDiaM|tUXJ6{7nr!pj`aJt))t2e^w>!V==c{r5FukWV*4^iV zP2i%}v!>0ut-t2WT_@?zwb2$wR`htuG}$Fsie3~F5%ObrGC7X*w{3;a6Y8!o|*bt+}`* zrEh^?mwG8*#;dnKUVrC}o_q9d{p%~AdgUMcwJ!OiyYXZ1{xw>SKBszvL1J1R=TiGE%evJ%lTEj`*Em-`vfdk3D_Wz@14Y% z&2GP!3ouM-{%-AI9^&lH%`>J~B_>x9Qp~RWj4QZGT;w ztUhUa*x9l$wy+Q*eq)UnPXvm)KJVtgdHiTq+Wh24wegNu|Ad!UKGO9MzPiWy@8*2D zRoAwcO^jYCuBjrx*&OZiX5%geA*+Ro4q2Bzg{i z@BGZY`ns*UG%Fyk@*k7Sm4Z=RcWOCbVCulGE}*(Z5LUE8%nNF3xBCsxS7( z`J;G;t_}Bp2F^3H?zcX=_+{tOw|m$fuOv-+xOk6UNk>T1Lyu_!pO#NMpnpDCJyTm( z?>_@;{jP7}H{Vvjd9+@<{MP*+qU*oM-Y&iQ=CAWH`z?9x5B7^@S=k=zTzBtQrhcm2 z#V2(KH}5R@8F+Gwfo|8m5AH|*Dj%Pxc;!pCw)gK$og@9CM{l_) z?dh|;ch6Jl)|&%58+RqSx>=lHk-2_eUg#hDAIEDp`72|4rGq~5ZM4c*y||+3c6i%; zk1b}qdCsnk+_EaKV~VfRWS%GN0c~cI>rOw)KPa2*|D!eakv#iL`CG+V=Dy{1XGNP1 z=!He}@Rka%V(%2VQ}2$mtD3| zT$1rm@mlF^Z`JPbYc>;W%XiLPCa88|`Xr}SO$Fl2I>#kdX3yHUHgiFy`L1~jt!~|j zbog#Ay#H>WNzj?&HyC`2qr{ImuK(ln(Z6NkW%Yakpu&yQ=Rg>zg;Oap`3l-S+;^ z5Z7+|;qc?W=^w3wXZ!WLT>E3Wf}6uMJ3F_CV{!Fq$!?B!%AcN0$W<)}{dQtW?}?qF z*M96jQuHI(X?9?<%auQ_Rz^yhw`>ac9$7j=p%v-0Q_FEj=Y_e7*&_eK0?(8db-kh*h&RXfJ7ymkJ`n@&p zm;GA0>C}9a+0Q=xnOtAI>u1#a^11SFbr<PKuhJm^tn&p9=tobt8DUb--__)e}~PcMeRPmVoTwj-M5d;zge{RcI29HtxIh8 zzfGy==GD)1LW_{of+f}drv3*$k@<;8X>e*-YqxSB){-rlMWW)BWTd(ffJdL}1 z#q(`@3>LqeYPNEg?n=qfTTE$x_rH0(#`{A>Z0wE7III4ED_>W~+Fgq-R-Jq=ZqHkp za35yQAoZI^R&($8d3dSr+&m30#i^(g=3nKXA#+P>CBx2(Ts zKh5~Jw)Jb={NDR7ra!-;v8cad5#$ zrVJ^yF)7XJ~XL0W7C;aW|Ot7CC?@|1+d^^Go}$OAg!+Yxh2K+eg{H z$tz#HtWV8evBaO}y^78QZ;=;|OOg}$cOHJ1r)D1h?A3n;{^Bpsf4-YHEk8PX(VyD) z*QY<->|e1@^-si=()+DJ6^jr3TDJMY+i;ibZ%yucd+gk{xzeS#GGAodcg9Jn$EO=4 zTsUDQ>rteAwKQM;~9bD+0^#PQhA#CZ-uGf6Bqj4UUHhT_i#_t zNB=|hqCX}Fe7Lt~p8jPWoA6~Xx8L%8`ov16H?x@aV&|OW%&}sfR~K}w%uJE5)+#da zdu8|RKf`+cCrh@y`SnU*bKUnZ>wn(<6(xJWaZhaeN9Kc7{GDH3EN?p;xv@Q9d*92@ z)QG1JuVge8%p}rTdcK?Pj5&UopLKq_JntXfht5@{$_rnX-O9hs6Bj=Jsmsd!X?}O! zo--<(w`^C{wdGo~pWON-DE(<&?x8M?6&SlNa3weGSwFf$%~i>KKxxTpx?Im zqv#u{-X))-41TN6S1DZN(`R&pVe=fG=Y^~5h3#}I*86=lYk&Bkfj6e2+A>J?j>xZ25uYA3I^5OJ*${@V}We%JZ=9E!pA1pomu?Q{Exbo&>dOh->h5r z|8PsYRq~%<^G|En{kK0qe>~s(%#zR(%V|?8r)t}G-S7YU^ZhQ9Ki-e`%U9?>THadn`=^cRs*3U>yVl&Q-8Vfs zyHIX3 z-*z+e@zgC%PvSW)sHJA3!rPan^{=~-MVTCYE;`q%B+@3~o%^x{R<-n@PGb-v4&xK3N?^wJN@ zAN}2b^tSNIm-A$r>iFLEP-t{KUed0IswwBwCw^zbTiwmYryL4Xj zNTFhxT?5aoPxch;LDiW1)AoGV{l0B~+18$Wzx6)@^9dXI$MzBx)4x?-{*%eKJUw^K zt8K@ld*7Q>Zk2eOtD3A+uy*Zgcbzj6HfeC(p4n*gTy*7cn?F8R|7riRpB{NFXWFet zvTm=AS{0&k{Foij57#9X?k*qYW-fVU zr93-Y*6Fo>t&^mVa$0oBG<_qn`%fzqx9vErc~Id}MfgMcP8-z+{NfRjKkn%F?s2ay zTzUGDhG1yu3fvl@74X*f7Qzu2d7;LzW3|f z>-uANzudaDeErArz>o8K7w%jBFgO0tFWI@Rd%joo-tT;?zN1G;BbEE9uh_wc-*dZj z4;-48ee~=8Tj57P-mB1i_p4U@;l9Sz6K=D({0E|>Ol`8r(>omw`-XSM9zog97K zZ|;Q7&U@h07+B-+Bl7WohE6+)*@w&HY%gy4D;#>6Z}vLX`EBv%=U1f8e4A2zOzii? zZH-Fjx35sS;`Xupe*DFMyjz!U*>v<u2#0UoL;OUt0b9yER>!A5Vu~vNMSPQG7&9 zJ9Dv--!;zsWq0;oSvAjpx{BY@X)ipVd3ia_UMN`eeAc%GMT}p+u2=a1I*DSxfJwBq zt)8#7%RKSFU#?pv?2c|w$Pm24shn)$*zBs9ym5+*)XOysKDOA4C~p5O{7=Bzmo?zD z-SHi}uSDgfE4*V=^4aFVlO}QQa#z@<{!@Ryy#&cJC!o^`=&SYMlCY)w6AKP{KNWT z^RYVdA3Go27k{-+|H9W;^CR9&-b-)(D9esqD4?w(ot}M@@zBouGyWx?xOr*PoptS= zAE!U!na^z}@uT^uK;*i`rHKq?*ZO{oW!{};F?B{dSL}6_63O*E`M2VaxgVTo)^)6& zzoP$0mF%%e-)wsJ7l6t?_or<%Bfee4z%9i3?L%tcz|u2{UW|(0{B-xnh1?yi8`f zoc(p)IsU6pK796@CG6V4mV$1_(ml`5Or5#sgSUsr0miJ-Kl>lA1N0v%!-;jzde7=f~JgZKg)unP8NK+xqF)8^10jG zw}|-|to!|F|B;_x);G^O-e+T4Xym%kD9gC>%}udS*Tlnj^x9mJp8DvXSjdAp0!Pv& zAF5}3c)7;*k;fnBRj%tRX2)MqznWLNLr3WPo9+I2(@v`{NnK))(`CX{zBxrG;@Qq0 z`_rxe+CO-<^?`qnofvNo1#Y1@q~xPx{nVrXTh+-Y$IuAl$24%Cuh6A5%> z0AbMw{~5N3AF~(wWA<@+`%f63)-Id4Sd4;schkN^O`87Vg;+?%(bFI+RuhJ23&jcPxP2zDepZ4^M zP+78s>DfQ7KfE9HcUGM#i<;k)llVdEl~`4G#wVAXyZ#Q!I{PtDNAipQ54ZbA-acHo zWQ}os)~e398}T#uoYy~Xo^kPKT!dm>?(=uoy-BN7Uta!Jv_sCsd-9{weAeTS+&|{j z$^Sd|_s5k;Ps?4O3lcv^E))D2z5PFf-2G`QE405w|7U2i`_FLLT=~dp;6`_JXw_di^W`DeM~ZF+w6snhSC z`u~&N_xhp#(cksm+thA(KY(|!kR{w=eiBH!WPiLNIzUD_VK zt@&B=^T#cfp9Yd`mXEJ5?Nsl4Jkfda%CO)I%~t}gKUL2c-7lN_i#K}i{p^424Y&WK zehjf^vf+KyGVS2j{|qu&WfLRMuRJ{4R!{n++VTjXSSP#Z-_R` znD*@6Bk#&Rn<_Qp)U@XnxMlbMU0P@HYCormcFBjmQ>QIozuCF8X!^rj+}`qA+<7ML za0}i!E7V0&Qg!x3X|5MF<{uV^e6T<4D}3mm)W_(T*RM@KSzWvOPK0~G_1o`!k3O1l zl51XJrP~8f-v>R9HrQF#owk$wag%LrvF^Xk3$t(am~FfCN~&(_3TN)4EccRhrfs-$ zTx7;w1=i_W@dcJl?H|>j|BXKN*L+i*k9XR>_xsn+-50)n=BYoCe;4m@e-PgtZYQ;Q z_Tc~ymCNG(AKopPWViN_>(1#Z=hIFbtw|ExZP^vS!#gL4x8xt^kH@QH|F~bbF?}@G zCc-A~e$tk$m-Y5tYJMx^GSl8-XYb;WDR^4*FJfRpYw!G7PpFBa^~dUy?^ocr>{R%ON4d@ZBNY) z-G6`Eed~EUBjRj6rpPgUu)*#Ol|#Go z`5jF(GnKrQc5a&Ct^B@XmdF08rOtWwN{{C!eypien(Z{xMz?TT)N7p+O7kO{Jl{T! zJzQ*3xXXC%&I5cqEuMt4epvlr-|7eJh5j?-|CoH#^U>b0+iuN=f7NVwxj#wZ=CP+I zCsb~#oNS$=_c&>8_g3*aw?670J};MjpZ&d!zW9-hw>9A<@76z>|0cn{;If!uhte$D z<)@T%Un?Z`K4?2;w)%AH$-h}|e?`!zp&s$KTJ^4p)n|K#US>&)0$&-17B`u&IR zJMv1sZH+@e`cB(#-&QjHQkX~1*4<0F1RuMnXIzXpq5AWF`psw79Z}Cr_-{oY{rtoD zVR@%%t-1G>%YU2`Q|%TP%T7Ihb6)1Mn?~;DGlH(4>i@pLC+75gUAwGzTh8CScRlRG zw8q6h&V9P|D4%b4uT(wrwxthSHg_%-oN~nc-J8N4LMaD%zZXx+{1N{x`jU_P1Gi3)3r8E_j-l7uC)( zyRF5koHENJep`r{*Yv1#6e2sZ|q)O={j@XyuJ3)`!5}I6$z4np6^<6%@ zIH#jq=>(U~P~F6Ks15_TJbOs3T;p|Bhw%$9?O(j zErBdBrI|qli^Oo&ea`|nt!qP2T&Oe)T5)!|?z5Dg{ zv0952Y9IbhFZi%$SMHMad^O7|L}q*{Dtl6R-X?Tg=eBmeeJa82#23@L<43MfuR6NP>h|GeyQ_{{b{k9795Qb^wBh;FR7sh(=b{(% z=4LHm-PXb&!{cVbQ}pOA?siN`rYdCdw&24&P1+~_@P14_E*0KUr}FBb;$RJ`u%hkQ=g?RKzr3rt3ogHX5-FF~#Zu;RLUsGbm+|2h^AB6ucjmt7+st&HZ+4~F za%PjY5?vL3PbN%Nw)VHMWZz+W()YvThtH38obOt@XKrd~aCWiS&PA^-t9{N>jYw&# zu5>%JD06yAjyZe6kv^^DieuiZr~VaP{A0WLUo*?dEx+}TeflGM{maCszQ04VtIQ8Z z_jg3io1Iyn{qS$yWA|%sl{fCV{4PH^(q_fetxZo4A6887>=AlragRBLso{*VJQEKb${OlxZm0GR=*X=h!#asmhUF@6r>Fc5{3vuru)^fSW&B06`1aOn*DibZE>Ld8fup6`oO6CoDmq-9ckG!~U6M~d-zvHU3yISS`?T+|&{;11ySExcgR>@ZXIfj9`Tu87DbK#T zFXfkRXzQxHyVCJL%{qTBUHtC*=g%5;>NU|H!dsqP)LV8Tug3kseAe3)?MI4V8=fpw zTdF*}C;F&LklV%$(cyP3BN@{qd{VBhTl?FsVpU}6e+HqKX0vSKxIVmFzBF_CuF|)A z)~<~GyU99IE@D}vlk3y9Zz|k+zfZ}ZX{3GWkN=0o58ofVxco=kuY0y-oNta8-g`M~ z$w$ZfjI(=OHMdUk0)149Y3(1CHtO!_mS(njO|uTiav8%_U)0d%_rGx zI@Pv%vKZS`NXxmT#jb8LFwERnm-+qo*8Jzord_&y@7JVj)7tYd*50~%|J=2Wr+$bZ zz7^K^JGjQX*4StB<+%S0hi9oiy&V<3#O+bp&MSw#)h?LtJaAfOa^u-**J7#philnG zKW=UBPHTE6TX#x+v+C7(3k!2L9pN_Ut9U#ysD$T@)2VeL8x@$cm#>WHioZV5Kk`)U z+n*}`85GbqhQl_9W22eR{Ab9pmghhG;d$ss@s4$0e`otLUCX}e%X~!qm(S!K@zad1 zO?FAqxg8q1`E=pQ^%_4qAIul1;Qto&p=MWmcTJ#t*B8w#+ji}4kK28}(B}KKM-Q|`t%=1SJzlywlPoJFZA`SxbuZu|L$eT$6O6M#F>{k!C&ZIy2BHb z#BV~5w@)bT?C;t1N9ITOLtBQ5_s3?HZf`GDkDjX+QIn`QIe*2R)ym4Z?;O!%P?W4p z;Nv*>Y)Z-Q*vCKW*8Q&3H{4nszxhwS>#xl>UO&&jCG7E0y>(Ce!}Ss`c1?XYeW`4+ zZ+qC)Je%Me-L;9;GahDydvaX+%<0TxB(!F&+WUuRt3LlR`SAArAEjV_)~kQ^Nldtt zX>J}h^~|SZ&%|1GpZFv;XU3DnncmMWUpL42cm5Il&>Zw*d(y0Lt25Pm*Qs>}ZTs-` zedls+n~jW5&+ST_f2u6Dq5V@QYw?_(%TXcSZ$KAcRlWRW=6y}*bNuy}>mM$BvQOkk z_apWnJ8K{97rOa6=!Ng&?)B_9&a-zuZ@YJM>O2m{OA}p;U5+qa_IT#-e!^}MshuBY zf1C89_tAXOiXhoKeeQh|*T~wR>G$2Rv+W`4)45MRKD_tv`uE%v$5V43#hK;&+$$t& z{UC4Q$DNPck4Ui|KT@(Md-;biaVokBdm_BHJogp;Sy#~^b1LaGQ_s6~yIp^XKd9&T zU*@~~#r*WizQ6TvM!zlIKK)z9vM&BT&#i-=C7vtZudc!*VzON1`{j3=p8ecjUT^n3 z_k85JyZQO*YxjA*j6R*$uKYp&(e=)n>_@!L9v|mSv(oQS-ez9^q5D5W!o7$)Hy2D# zRC<*er~Tqsh-5-&%)|*stktT2-e)HlniS5Av$%PcJxn2m>q(9Hhw?-B7_aG>=BnNL z=j)YWclz7f;P5?H;xx?jJ}ojgI{DOnH+xET$OMZA;n^pec$xBg0-y!ziMIjO6A+#gB*G168qPO6UGUC(^V`#PAEtAiT2rpM z#(O{WAN6Z>T(K{s_N`a=QNHZe=R39Qo`=o$>N=INUUr-68DDOWMf0`V3+0s_&StnC zy12&pgL~&bk=Tl&Ywvy^wd4B8y4%{n^pW-5fV+-beaU+!l}}HUP)=zmo_OWzy>~@j z7vJnE$^NwI&HBAxd}|Ez)8761>i>E7=WW-nYk%w9aB=@b;r`%jrsqX6FGauKetFA( z2JUQeF>$NbH#-E5EMg0r=6m|nYN2MGh+LNb`TLY#+Y7zTU2pO?Kk{08hmG`yU&q6C z?!EA}Fp#Q^6*(b$wXUPlO1m-{F+;ZPa zSL{>1M9h!cgDIg?+*QI2rdvBTPx4^D^K5zF9_0^C-{d*eySLso|EqRZ=Syd)zv!7s z*Z68Rti}x$pB$_Qst*^Xb{VuQ`u16`E3=*(bf0ie6IAx_p~w@t&W@ zYHHjec5C;3#S%&wEL}>*Qnow~{VqzocGFt8(!Ql5m`Tc2aQLVX@mn zjaza#?##WDb*s!XvgFI=-(N3(Ow-cS4lloc_HNzu`lc79(>HJ3I_I;Jq#Psxr;C1) zfAjFMz1aE3{%zs6mVVn=D&AMZz1KUyYT5d4r=wT8m`!J@jxgV_*<)U1?w_q8JWP`~ z4~O3-K4) zVUyI#BmNxa{WLA$Y1=y^wu-X_^Le-2e(Ub}@&4htL6?t6)pu_BUG3}f{^PfWRe3?B zlTT`{m>T(ZyX~HCCAY@#NsW!`<2L;WeHhKleq@$XCa2!yN7k!n-wnI;EK=QjW}f|Z zA=$nQ_wUANykRZ0ia&I}Wn)G1(Xz6r&Pk`Zw@STRwohx%q;Hq1Q&@wt9~Y#z9BE19 z=@N`yz+yIl2`cjw9G{bvw-_@jR9&HWwvi?7yLKjMBJ zerfI2+pl+;EXmGL+g4es*6VxeNYCZmU4};<6-=@)Y>b=y(fN@}o&DdLHi?gJM_fA` z^5LDSw?_Z0?PA}}?!}&w%kmb<>3f_O!n9@*@48n9<{dr|TqFJA{NZQKhpYZGwCCAq zu8%vNuli=zqx)(r%QbE**`C?7JI3n(tK!@l4eX9V6_X#%Z`h;wU`9#w%lwkFY`>#6 zOe&xDs(SmL9l|c#KKh&7Oa7U2@vxj^1DB}L;;!ql!v9{yKl&c}pCNR2Ri(h%>i-O# zTc4J_{Z$rU`Kn;&G>N;@GgDdCFBkds_z&^hsr|m!)wj+6Q1{-z>ss{7AFuy2@P9_$ zEzK(0{ph~bkLm+f&RP4^uWvcemznj*xO#P*dp1Mz#qtvei>i69+ds;npmr2$<`eWSGNBUszP(>zqPvSTWrMI*%$Af?$;U;z-8c>$ZhA7yfA{5ge}&)teZH>c%g8`?<3!~TkZKS6}xQ4N3V^Ca%c7!dVKm>7vLIc zD7{hD@$7lFKSkH~IGZ1C*}3KNgddZmu9v*tC{n~U@uu`f!G67h<8NN>nA1G%Kuk~M zg+0t4`aOS~T{&0%$mH999Cz-@P5RH^xb5TKZ!c3*C8x!j#QsbzYn8~@)*!;zpuOVK zKIRH#*N>&!JJ)=f`S|MEyu9_g-C5!OtXr-twjOrPyEJ3*XY-8-D;U+wjL)jpr%hk~ za{Khx<+)KZ`^>ISf4zU+>Zhrf-cR`<{xSQ=KGTnE3zt-!^OhAqB6wSR{}0#y3>)%w z@^UVCZ6s~?7T;~&WV^~kra*h^qU>2R8_UZ6GuZ!- z?#(p&9{z9tlA5i{^mg07nfc*;$Yqsl&!ny%ZQJ^2<{!&v{~0n*=a)RIzSe&7@3*&~ z5`M<-;gC>Q2%n!BFR&;0(Mp{?!YTKR_gr}VF>if#X0F*YE7ckyTr&}{Vo4E4u7cknpYd`e)$i3Zb|-|8dv^~dslQ%PY;`Gsxmjl$g)y& z{@FXfX9yL!TCYAAcJa;b_-Xfxx}Scny4gB^*RIun-kzK0lk+qdZN8cURcVSSJmJJOz2@?uYG7ruP|sR6A_Fy!QEr zWph=R{+rI*dwPxIg3@W6qK^w=@|5`PZq1GtjE#$bd28vJPSJY{i>q{*x);y zHlDlYvbLG_1sm@N`YpefAJx^nA9q!0$H$&$+PCfb<16MpS$o=HXJ6s6t;eToEA*Go zNJ`?$JLi_Y;JS_Z1NrW%mtC)(U5>t5W2-*7I5%;X)!u#I!VO~Gd76t1J0*@9Xn8Sj zdDQXZ^5^>E?XQ2_y!VnT>ge&4wXc8uT>Wz8z3ZwU%a3fo$MYlbv2DFiz^}fki|)lo zYwJBo?PD)_ykE;=#TWhd<(@xgKAJCacGdYxz0JAlsl1)6Nt@dko-^br&b$|%bVDXr zc2iZa%Pn5J7x!nl_D{K=VJ`9M&mP&^pO4*1w|p!*)#a)0k~g=~w%qEuob~GUvznW~ zt+VDSU98hu->(0@^z~iypbx9Ed2W5%P^vboWJko&CqA)zI^Kx|)l3KtKlM<2&M*15 z#_O|o=^x%LU90r&wg3F^>s$89W$%`(nccBq$L>DMnw|DfXK$L+_=#&ei-3d4`^Gr= zAKuH4u8RAkcje34Ttm^RSI^B|w{7jWipO`7wuNUd`IHRW0jfOhN(vRqKpPQ#J>-*uTr#mi}+`caz z7jf4`ZNt_blg>R3-Dc#g(fLl5v2(J8gC~RF=}bAzkI!a4uxI`;>p#O0tG;c&yiez@ zS~~UaMX^cISwX5tjLeR<{k+L^tM2F2lm2F^6K{*Yyml-9&F}gTd*8jW$z7`6{Kiy$ z+T$Zp`F+b<{%&)X{&#VuYne)=q}LNqO_kg?tYzP%)DOKiKd?)^Va>ae=y<-@0`hO> zTFH87-&^Giw*+CPZQ zTXVI~{*imn{_T5&Kioeq<|I*?oiFzC+e*>Rr*69*y?!HaLB5&A5*C$BdG5Vldun>t zR$r}LN7_bbZI zbz7gOctO9`nP^$gLyzZ2efjl)|F-l$mt5t4hrItYELyrgUuChs;EW4PL*w+K);&M_ zQB7%`u*Q*V3-uy3cAM_{ViR}$TicJ>D`MUAv)7$2EYxQ0lgLp>FREf-sZ?9v-}9eA z!f*563HqkOtLx9VfAfi}_xaDT`qTE3=SvI!GjNx3*V-Q`cgo)J<>$IBuXlTApPZlM zY%*cfLj6Aqled@W-h3GwHdklDtyjNT{%LNQ9l7IT;?`}ue@E#Eu?j4_J+u5_g&l#CmAs($#JsCRZCsYm(2*9R;RLg|Gw>4*5h*|%-muYWf;H>~{N%r{kd&&E&Yt2#EFKKP{Y;EMRi$B$-AbE^$|D`t6g z^|Z^0jA@of(!a-;vp;=(aILj(wC|NYw)sn3SKYdOKc=GYnC`J})&+;GcgkfK8`o^y z8MMayTi?+yE<`Z?J<&9h@V$0m`oykS9iTVm+;{|x?e-zINAx^MnZqubuskjx)Av=s>6OuIsjv6uT#8dMdyyJ(CO*r_JZJI(>nSNRPts-b zkA!W%QX{+IPSnwbXA~~2Sv2jPZEVyRqXl6-w=3^8cZR3h% z%M+`MWw%V+yQgsKy@Sq7U6w4(=d$0JT2GOW`t$rp?SBU4_ig*%sOSD?SRA(Bb>V-8 zkok`B!p9-+iEuPxo* zCMR<7Pcd`+annCx+ry+--x$X@?|OBqJR&*!XokF$nqb`z21g@7)+1;5Pp2VrKa%1tjmvdrtkmI{i7&QVeTJKl`#L5m)b5L z&v{RA<6Dwk`1Y*E?{)sQ>u+86t<&=QJ3H*%)At(3uibrjb?!c2>vGRKKg$nTU7O9; zx#ZT}irXQV_esW8JU`O({Yd9p-<0n>Ewfdo{gstZp8Le^|*Wp$+QcIt$7TCJ$)BP-u$!1soK6s*`eq+`PJK;Z${g0+Mys_~WU3Am6 zT=z_-kpFW%r5%$54u0!t`}Do!ahc_7mqm9HpG^NZ`>F5Wm$$d|?=YJ^CqC@`e}=}p zOTU}e&w5iNze!}Z{hQq%wz*ikfA~6E^DCQS++J7FrJluSPw!UJTr!zUW6^>Iq>R8G zv(M!J;hH=BkktH+e`@X@cD?uBH|w>}Rn=-&p;Ng}S{EO^WRv^J=f0k~+@0&weysl1 ze(CWK#Rs?RW$#s_yMFxMo1^z8H~){{#>{V>X);TuUt05qYq9%NFWrN8-YTtNU`zcK zeB4fd--p^3aWOgJYk%T(HszS@lH4lfW;p3hUV1CjUy1oy50?L3{>Qd>Q3YsR`j>cz zO|*T7j$WyEt-WUx>yD;dcc<#9Og}$MMOyZ%*=l2B`6;GpS7K6@Zm#+jdH-qdt!&nB=es6hiwxd$toomFU{IEF+R#CU}eP;hZi0;Jxvy- zXKEaNTwG9N@iD&nkL8Nv)upQ{7X660<(?|EquKt`wWNrg{xWf8mB|MzlB`sozP+;a zV{z`)S9AAPPO8_tZuw~XrYviXzqf2_buWLsbx%F}(QSE_nzC-s59&uM_JmeHk{69J z@A$)`v~A{>BN@@>cHWiZd?2{R@Uyl^?hR%Ysn7QtXPxgz_}Jf&pZ>AE%T&0kBy7j_ zD_c*mUR`N-eR-CD;fy5{R-V4?a6s^r$j`EOx%Y!!*-Pbq(U;HMCv~kxcm0cRzm~qv zyj8ZhWZSYspPsmV;<%jh*63++k>sOLi`OU4t!w-^{eYa#)diQn_{;prkG%3+Z)0TU zuf2OddjvXLgt)7`4dOiTK)^SZ^_|MD=yze?t8d-?y7_Ig=WEUTtLm%w?|of&clVu- z{FI+*mH4Chk$uaa~Xv(nlonx6gRZ_T>% z@Qev2i-l#+9>|%dZhG~P)yK2_N6s_;s6RR@McnLG-PUB$zkAHPRvW2wNroQUtdYQP zkz{)|=VE3ydpCD z+Pc*1x7_F6JM0%$_#|ef`j$7mM`ukpoa?{Ksx`hm-=lYQ-?_d|ItjvC z-JT~EFxsSBhf5X5oqw}8eCgb^pSPb*R+S4)-F5NT-T3F(f4;xUxVL6~YQYAB$TK<0 z$u~D1{PX%z|IO5tT_2wx>Xx<*y7!+U=)?QA{I@o)%Wp?^q+d+Y%$O7%w<9SoGGBRK z>OGN(I~v^2)ZJClP?^JcQf<%vhxgxdFZkpBk$2hj%%!ibQmW=Ix&BLU;YvrL=xno# zR!b&Fi%guMp)Yx4wX|r#L9frjylS^z?f1<6l=n9(c3sRq`PaMtO;0-B^-tyQkN$_p zd-DEC9H}{~th=a~bIke4G`pnr_h-FZuGYKw z?tSOwcVAsPC|SC3BS-q2{s+EqR!{zSnem-={|EhJvHHh6A4R+`x$Az&YJS@(*=XPE zds?Sml+!m=NpFfw(m26+>O|p|=c3$qC(r%n^`Al0_3!nlwQs&(jw+uN__^%;s_gfh zFW#=5w*35tOsna~{F#27dUX7V`_Wr&jrEnu%PTfrvM#Mn_nCHzeOHHTK117!nT*XP zYD_g7x49~A&-&5)h@>+q`)ilgUArfA%|HIUqvcXw{}@RXgc~W=FTpY z#e(ZXT3Jpx^SIAxyi*hPF>L;W`|QUb>3VxKM_x90eLM0U&+X!L$*CcCSESmTIPN)j zoU6cbZEmjI`Kfi`_4Yfh@4j=YTDHrw=IiYz-+yLq+`8_^ZqE-jQC6>K^@JH^l}~s% zt9;^4uxv*vw&|0DC){SPc_Q?_k?y#8hW5sgJ%Y>)W!~rfj#q%t`bH5*dD<{uVp?*Y6N^k4hyA`J-mR;N; z&9UHj&BM2658i#|HBs%vL7gav#SNS)%>MtRyq&LX+0PqOQS9)My?d){)vfFkCAnJn zM0Rg)6Bj&p`6uYSm6-O^TJfrlT+(bSk5ApT6wT5^y@yYaw6LZ|EcwsSUs?2F zy4t>n+ozmoDU!IS^h)Bm0^@Y$g~toys^yPa>es1f)$v}o5q#tuS+er&gDa-D%Qtv) zyUkLa>dmP)sf*oy|m)f;lowiTWdTQ-_8nuzbqpA`?L?dcJH1B-f>%+mKk1j?04j&(42zf%GOfs z$Kn|(HitEPU$iNHxHmm`&aR`W%U^x_d{)tw=V74C%7@3oC{4?r%Ox~>~x;seuiCXmy%PH=K+~XnZBBr18cYc{ux!)U6%3j*mtWX)5TR4rZ7KU zwCAVLq&E!w)9RMZnZIRTw_YpHhacrfjDK{+#>H~hsLO~w4-((#{_LcrUHJsjq$6$q z)~9c|pPj@Sd*9^A_FC)ML;o43gipPC_1?;*8Otw!^z^&0oAT%DwbOp}-)cX`P5p5G zTjhtV?>m;~ihJ$9`cC)D(pmQ$T+FMcEcWkF2nsPNIwT^%)MVVC&HiokkNyu|m*=<6 zdbj^!J#TiL_T_C^QTsdc-oAD1o4$9-t}RPzPIl)>$F7L6vasOvs4a-;ePj@dD{AWm=Q6(JHf6TsDiv6HG->s})Z_2lOmsg5T5t~)w zt@6ou18dV}4UUF)X(|bPPa1;bKKbwe!?N%1{m^BzTVf|qdRy;aegDa}Sozw1^&c-k z3P1ebY-jd|zdQJ2czen0A73?}PhG!NLTUb+wae}v<>`xTuFO?t4YLumkePMpV^o$s z!#>TMcin5BWX`&BxjWi)Psh1L^VQn7qxMX@F)cD>SNdfZC?0isO{7b)FMSJii zowfRJmuwcY2^Y_l==x;wyt-1QJ9{(3reiNVMI|092o(G2eG2|uKW+83+uwa(CVEEk z>1Vsv>D}+UmnqA-zQ-i{zUV7CsiuFLrnk8>^7YnCw|>+XR@!@B;;_UWzWSD{XR`x0 zUHnr0`mdKuyWiHvXFDgk-dQcQQqSYLGQ(A0<3mzz{f}hRW4COI*jk%>Qv7=L%vp&- zIh~a%S}W8U3yjVbPV&65vO05TY)$_xJEp9E@*nx%@;|N(KRnNH_9NG+=i1ZOuDd-u zICVMejv345t<4rv-XSD@T5O`jVL7+jNw*(8Z#A`Vo7LX0^VMbdhpkh@!-eks%xliw zZFTn34)(U*<9>z5Dpu`d`f+i=ACHgAd-j+wK3a8MPHdV)-g>ppuv-t8d}epp{ z)u!xkTP&VyQvL4Te*5{++YV`e?>)V5zsl78{L`*&yKi!@qO6|rkN%I!kMi+XZA=&b z=rz4PU-*^TKK(0v^OH@_{@rs*A-hqg%~G+?rI+v6BiS9#l_!7L-(cgsw1)k1O}VmT zLhky!uj<{rqHDG_M@)O~&&J+e6QDT7_Zg?Mru_ZBP-*tdQp=AsF8q=0x$nvz;YZcF z#o<@@W-tE{d+qF2>6I;uS9i;+(5j_3GIMp87c}=d#;tX}yp+9uJ&6QY|M| z^<2+&s(ZcdobR8vqWL@3KeO$9T6g;0p8XHjPY84sX@j&RAbV3l?40P;c!salw>=2quGiHq)0|!rkNrgSn$jUs`+Vy?Ao;p~mA4pAUzJw_Kg|$&Nq& z=J#V>9=}D>?^ph3;PU^G{$8^Fd{6!NoBtWOejgV}+PD92;o16G_CGj3$baN7vVS`L zAItgX{SW8wN&U}YXZWAte)=Dc{|s$)AE*EMar{3+?fehx_Y}Hm{AY+Mj(^twk7;0Z z_1lU6J71^ReatHL-LlTy>~yP`nk5_4f~U$_NHXm^vQZq^xG(!J|vn%gf7d1iFeonfDruCVoBE->vrh4k>XjPRhZT>+ zXMH`oZ*FO6;nqc4HdThdHQFVqY{V!~dSi{`b_R}DHg%^TY(Fxe=e79S6*nR(cKco4 zlb`umJ7P;j^5sW+g>IeZJtJ`YPORtCJ;5S7R;+ znf$g551AdlWz%ofSJs_L6aO<*tlT)IldDN&l0%Xa$DOW|52;6{3s({R&es6 zWvQ>R--omJ=1LmJ=$_o^Z1Uz(u13n%vY-apo;_y8uDa0$_g>b#XPbWZ=DPi2+oD8G zdY5mV{YPc??{)KicWs{kA^y;QiHfxEP9I&5H>75~*%SFl)_0F?Ze>ckhyLd$F`Ajv zcwZSw)*Nt{`EgFdulH|GK6o2_K%T|Ma>46aK2>7Nv+{a&|rnbFG~9)^pScOSYkoR);&voY z{(Qfw@1{KZdo*{g$X=1qeP6EsVc~x~y?Xbroz{QP|Jo7gD$;;!6F4sM&*ys!=K3FI z_SpF&)%>Wg)SoGPE-jjNg!OitTgvWfDv3Kvd8aA}@YeiZS;O;j`ya*qd^x4}IkT+J z&0O+g*VN`0b{y9pTw1r~W!hA`Grfh+qdOD&?o|4%dKx>$*j-=ZSMnqMKWf}dkE*w9 z_2=uIwFaufFX%DH(9;nQQvpPn({|WmSeuj=l9K*!y$UyIB|3rAynZ|FZw| zsn+lJ0nNw$EI-5__BF1^5Zkr)$CkF^@p~`7Q{1-wvd!+Knj_s=Kf6*wdvp?#%igd` ztk?TN`bmNN=B%H+PvPIieM%peAM5Vtx_$QHifOX$-kUakIC{(D*16rR7adLSuI_AF zQjj-U;i&qrcAXO(rt6KTR-AupskS@r4Az1YMysrANiy6 zgYmIE_v?8ayZ3x}Q#SwR{TuN)6Z3LU`ft9GRdA>Fy2{(?8R0$8>)afYtSt6t`9|*3 zQf;%)yMxS zUHmemM)y&#O4c&L{Mwa{S=;_I>}$$=pmMz6?e8Po=Su`P%Sq=wy*owc<+5$L_VfBL zN8h`9`*qRsk7sS)T>f&^es^rV%dUu(6rX4G@Sn(^`0YP-CzaY7AD(q>{(`gevy<0k z$)|qOzg!r5aZT`^&db)R9=Bz8b-i1cCv@pn_%Ta~TcI`=<=^qs=} z)`j=}Gd$h+q@J^F=Y#&6hg~nFe%QBiRrWpE%f~uy`OavQTsP~^-#MPbF`go+vH@Na zJ6QcCZ9@4Q&p*y*tULWnZ*K92?*aYC;-oJ=+Sq^i`mU)x>4CE}rleVSESqNHX302b zrG#aG;hg0^o)wefAbhwe`0f^^OA zZEyN6f2;ekw)|u8;VRHtkJ5V!7r*jdezJav+sCMQw+lxLS&sznwpOUfQ&QzN44$~- z>)P6vUQ54U`sZ8!yS{Jw<}EK-`R>|W+xD}1|MTp{`rnpa{^S3szbR^2^TYM*uk6&W zm6mqtZ;ptKJfd5+%OlEt=RAe*r`n(LER}nTjO?uAD$XCZvOm1?OML4;l`HFXO<&8F z{X45)GULgnK6cfG#j39sdpSNno$hB@7(4&CZTZo8{2%HTKFVc2GEeTqzuAi;W8z*h z$6QFiGAq>e4oh9=+kJ-*N+@m@lQ?Giaec@AEz6Jax7|u_e{?Qac_UW*>z*JsdTw-?Kl7D z@AdWe^7TKSf3P<=c3=0=I?0QA`lWZBxoa~vzC3tn4g1Ny(&>URPHKWPZH`U7!u(|7 z(W<5WM>OX@sN5SS`0q+a{aV_3^iOI^;`a=J}A3dW2)E8 z-&|7NYxO>Kda(D5t-G6fx2`d|?XqcSo4wKvV|I_eZ_L?6$6rOyZrQSZS|y)yFZaeT z3@O*o$urkyo=tva-}A>ddDXsF)AlZtms_Q+eYG?vy>?ISG?+e&OiS8?KZCE#Sb!jzfS#f?V40b z)zsblR%WeX@>}QO@#mh5Qk*0W8!_Hv~}{F}37Yc6ya zluhSy)Si>V#AkiI;bu+M#}<1Q@tFPnj`?EVk8ake>0P>&QF0<8t3PGCzTnLH8C(*} zXPFx^7d=;Wf3)fDr}g0_)9<~le!1?N^d9_{MGsucxs;(B^+EO0VUh3Qx#=_dj%T-j=Dd^aa<6>yjIaE+l8?;_oBYUw zcUkw!S9ZeJ@=Vr7hh2W77kBL8rF)zYdfYY}ZvI^JS=v{pUq)9rPsQN)@|y4;^X;a7 zD317eyltJD#}+kTw?f<5O824;z4|V=d{%SOp6ElT<_I@Q^jy`Q{NhUZsf*W7y}JJT z>4xmH%a2yt&%HNaef#shyYyu%{?;&7J~(S+YxFpL*GInfV*Mfo7QtLUgS=JV&Rfwb zZ1_ie^RnX#(@aaBMljh;d1ZaS`ai?8`PYj?*X%6Y{^j56TF{0t&<<|~?O8wit5-z) zP>!h8cHH+-_s_JqJW+YGRreHo#pIsI?o8@ooO@i)f?d*r?R$4k@bVw(hoky-ukuRT zu%UI?gcWzr_I0hkICeD>(-{SzP+QLGGWWN>w>XA6F8Hdn0=*1?@!=kF1{|m z>EG66(HT|S_J8Cb(8uGg4WnuwQe`((BDOxfAWeBBMHJv%)kZ=}m^ zsuh1QzoW+U(fgKv;@53rd8cmgFtt|Mv$O5c-M5c=JU=h$xaTPuq3C;Hcg@Xb+gDa? zp7M16e+HFVKjUM+`fu&Y+@H>I$h!WMZtb_rxl-?ombU~Sv*Y~Ge>k82-pjR*RzKa^ zcKC?Yh5asKy|OFs7CHa9{C+A^VqO0H!Us~H`MdA)}rzs+bIiS9{9z;tjE@uY8dgs#C6be^Baq``2nu8}k(v-Vs;7 z{MEbd@>})5)pcSI7VTMSKl^F3gppVBCy!5$l42_lKP%N*H8*Cz*q5K?zwSFc{Z>%$ z^7IeY^3R^DzuaCqL;hHu){k$~<1~KcKVp11m+$4n4{x9DeK+k_m9bmkSIsN;I8RSa zcX8x2Ty1zT-P(e`mG~nT-51}iyKX1D_hZuw^+WeG8vPdjo&R^Gj@bT`^SAH&3q6Su zzmXSw`uFU8skyt0+%q5O_lW&x5Zrp#@3Pm2XIo6VrB|<(|KXwjTVwOANB4q7Hm+Lp ztW@!8gT7GnmI)rpmPg~bZmoTPe|q(oyRW}jirT80{QA9by1(nE-GAc43hXI8{CMT- zLyA96KJwVd{NwQ>*DVHFlaAJ}Fq@og)zWtB(!B?_R$DsAUg3y)xKpQGG{|5_sIG>^ z`E2`ndw;+FuC%zWF5}&;pP4^1zi*zo^GErkv&r9rKcqJ=y10Jnz7KD&_RU}Yvh1^N z@7=F!dk&drTRzcHPXDxcnx*q{`O*sy`t0m)-j1Dh+~I2J`u6K}${+qse^l%D(}sUl zmf7@KQ?7}Zomg$cAoSuB(7JirQUiR$XyR-ipZeOeaoqy`x?FqB8G;`KXKlS=Q zL*CDu@!2xV53N-`R?qd1^GCjqopf$?q_6YoT*Xq4>%WZ-WgkzCxBRrJcG}%5xk2TX z(INAA3-@nTKd`@JzMy#I)f(mX8P|2q=4URy^`dH8&!^KI(I=kGeYo>?j>hb7ZZRe~ z9*rS_vtI1axZk^L{h{3O!{4Uey571+dD;D2ad%Fx|FO+dbz9{_!5MEByUY@Pc1rd{ z((#TH98Vst{Nwav{YU-7{~0*GyzdkXtCP9-X3<66QtkIVug$I--M-TUY_k3whF?7es@a)o#*g^_sP9w$F?HdK zTlWv&%|0@Z^%C#W$M-YDrEEnBoS`s^(2?kBUXD`eOs8j8*_sx~ovwNtMVyk_#B zp(89#@k8Cjzst=Qms~G@b1_1P>1k@mf$M=*3o0h^9)BMg_rmOqopJQH{j#OK`j4*O zZ~x=DtUm3fzsULYe6iU@-}8h|e!6EKbh`9e<)o*Iz88MRTRz*nMW&&)HuKm^x9qcj z|NeXWY|V6Ufm>4_Kh59$PwqcMm&IT1M+(>fsZ}gL6!3ELnkd$-b2ewS9-pdmxjkQj zZ}(JDncrSpH$Udp4!T~Wxh8X$-P*s+tM1fI&+*6mEvx9h&_t`+0Vo%O(%XOoVR$tDZ^8CT027Ek_k zE%@n^n%j~0fBoC{IYQHNUF^I4>-T?tlko5Se0H1i5BneWcc)(0-@?6~+k0_lRBg*F zw~4CL=UjV~c%b@rTWsUGM-P6sxhkx?#=H1q_rZN?Kkk2UyCmy<$ldq4Zs}@YuXrA_ zl_G7gq_Z!a<-aA;!j>U0aT0@`XvaZQ?{7Ijey;fwckTU}^``bb6|>IiDqRko9{6F6 z(NVG4rP>y?Ru610y_?C@rmj)=eQ`|B_x}tsb_!Ycd^OV9)l1w{N^PA(H(t_xXR2`Q z($#l)LV_j-BfYiWpSy5+$Ai3>C%J*1r)+NDjlMqHU%z`_f9+PI=CDc8(Xrp2UO%5d ztFB7kR{rLHhIpxps|i19OOqeYsPQkZ&i(M{bnv#VyS87>GuglK+y+n4L$?nr`$Q#5 z`K@9$lQ2A5vh6>^=Dv;k2OpM}CqAusW$PS#ZEeZ#$lNp2{41+IZz+s5;8FFMpDwgU z@9}fKseG${hi1>R?OV7Y_EntN){ky8A4SjQ`p&56_LE&^i|UP)jFb(2 z-m$Bz;R)TV|82oXmv8fA_FgerYjx=OZ?{b^da0eBkN3ao^qk{XX@d!S>tB)i2Ig-)F0s9QY&s!{bBd zRVvfM-H(c0pIjMsL1E*@&AYQt+_({^dQ( zkLf*l-v5WT9fC(-IL#3+;%g-Q(EPc)89=CH}Sbv)mq>6{^R#=Tkn4c z?mESkSzqEypZ;fHf10qUOOpqEc;v`3U-3B$%XbyeS$J^yMF-J+hyOG93;$=hUsBut zZ=U*phJCNjiZDjMvH$YX{_l798}%pP|1)>=ujK?>EX`*WP^X`DvFv`-$iN)4vZ(@5*~F*S5o6=-=6o_kT18Tr=TK zx_~B%HZ7kgYu4cxmtXD@ z@5sqF4ZY0r>}}XBUb}m)6Q5-)a8K!JimsNMvf%HvYtyFx^Ua9<@@4zHt=7vXzm`Ar zY43jq`RJdSZw%_rZ=Y17{V~3^lwV}+wUz5$2WKA+Yxny!>E6?ILXQj-jJ7>-?{(vD zI(^LX+1~KOAF{69GF=k&^zFRGtF@9Iz4qT+?V*x0FGzh_m}5^tO8)w+pXEpE#cQfR z@;?x`wk0?1^kJ!Evma~Q^8I?aCHi&QF6OYUt+5J|9%-(fI?eH!vtrSySR>mya{?XNE6`Xmx^UfT-*^;ep@o3iW zr(LfX{dDxKijB;ETDxVg?Mu7f?XRAvzh0g1{WGsN)3v7a!{o>HT{Ze2;zQc1mtJ4D z$Na;;d_mV0uY-Jxi%UzUn_q0SY4h2Ybj#lD$)CH1FLtF(UAF#h@WZoZ_q{i6x>`D) zInIAY?5*2tiefh1xxw;ThWGpy8NEjZ7Rx40xe|8n*DuZe$0v4fm)Q0_?DF-={~0E` z@5VMv4;re+g)65YuW$XQ^kd_Km%H4$AJy7BZ0LLAzcTXGESFiW`&J6|PrDb|_v~&< z^@cMGKRjSf2-#pcU(`z8^GjBgdtI^lk$%OL<-?Irj`owSdNXJ zney)PG&1hpJNI{FM2!D$yNc%lo3EAj^8NO>r1w=jdaZEatYe8L0Up7{M@}7ePMCU* zli6}j{4Hy*&C?(JdVMLrqSwZ4T4Cld8!Pk9t-88-`tKr@whQU5`PgY+^}@5@2=Cp^ zfvfyZuc}+WbYIrB8|yaizH2{qZS~*pJ6~RUQKz%Bp7X>%%`Mv|9ST<4E<10Mm~5nv z&TeHPueUiYttl+l^CxGj2xXu7lla@@-jCTw?NsL7K3ew8PUfTD`sJ^q*7nSDpIfJ= z%fiUI_4%AdB1*UHq%^rEJ@tC`_vV{m zR^F&{=7^A|%B1Y4_FZ*G*Wd0l`myNW`OuFJm%ro-ya?l7ck`7#U+cmn{~6NF&9;{% z1%8`8HTC=q#VPu?lusYsFzedMFfaY+_n%{}BCAVYEe*b5X{d)iD_BG4j zJT0)vn67^0)jjV1BmIwV{IUN#_xHyYnPK6`NZ*=8T*PuuhV+3wd3`VjtR`+o+G z+W!n~dG4M6u9d{U*+2caefr#Q*4ZblelE}Z&#*1;{r1zxtvA_R5B`|n^+*0=wb$%` zeChf_p7oDEFY^>R_WqyXPZQ0a?mQ*-zXH3CKi&E1KSQ0Y%=Xi>uKe)*?OEgY(Z6et zCf|PX*83jU_~t)zUv?wra6!|jbDt_cyf1kDd)p?3r+y#xrOj$C@@5`gyIy|Z#muTD zm%fI*=$F4K z)G3Jv6&+t(E-@!c_cW!pnJLG!h(w^{(Ju8e3tlFupkizxN`{%uD#~x)oHri9T zc@^IphZ7>JOr^r7ADaAo>;84yrFO4)yZ1kXwEk{yIor$+;sN!|a&jL+cAYVkn%sALxDuP>>DY{M_m?)QquH@d-L z`+k?+5wu;q_2BktPc}==Qr|Jj-e9wkKx%)~Rkf!U-gjR_h_j}DkD2y~iPP-O zPe~SjPARMx+n>JI{Skl1AIDXha_XBdCVy0Gm?wQr;YvyVTc&HPJ{x4GeO~-2SG`cFZt}rNKVExHf5h6~cD`Z3RU2uwZqqjtomVQ%efIBi#m$MQR^3ir zFS#gHNWfdjPxtuLojJ4fpNs7ml~&!pSWok=#<%#Esr#P2yZ0?N$oBc8{5LO`S3EvE zt8DJ_n46j9X1BwtPro&Zlim}(g;S@&CUtp_z{OJ$ZxZ+}$A(UkGiZO*V)ieo{t$cS zf>n~~H@=Hkb^G=_kv*W|Iqd{v&tb`1g%8;ug?HP@SA4bcT)X${z3HoNiG5lQ+De|@ zBsrBw&U;N}-o!g%^KPo1`}|zu+p~Qun^JbPStgg3l=7@;*K(9(k$bd#(xmcf->+Xg zoBea$^s6!-3#SA{NBes(|MB+Tt&F47OlO(*UNN%JI#t~qXZm6Hx60ig_&aQ@i&y+u zlD*5l<;&|{^}}J@%m2>v_O6^|^LJ6a>7SpXN`0oO5eEW_miA@e$WPDCwtVJdWCKaz z=d_>L-#mP*PIvyX`8?~ldcQ4tr7!bBylUElQs4Nu)3yh7b9a~irLaHqXYJ44x@MpJ>(l?(SAKcdyXeP!ze`_#ytnbrUG>U5e7f<= za^shcN*h}LGhA`_cwA{_OYQ9{^r{EvQkJ?H6o&IiZNpE=cRnzExxjR3SxPQ+Le7fH|{kqDwQ1RgD zsrmYXozjIqO>7RL();3G>hE3pb8GqTSHTxGOZ@)L^1T1f-?u;ENA+*B7xTD2*sWgd z|Bdsjuh+BaXIDzjYH4%4NzV|wD|L!-M>NMHmypAT4<;R%9BEj0uKZX1!L!*107^GpPqeE#?Mo~ZBlxjao1CC#*TZ`}f3HUKlzW+Z%n!iNF zwri#vuG(1l2AxuVw)W`Wv)iW^v(K7zR9bn*&8ZBlRhqMRZBP&Scj>M4e};Cq{rvm0 z-)DF~>|eXL^xvL+uYVoyckljp|H12ePTf6by=&Y`ch5Xz(Cz23Bt109QSY6SL%2$w z$_L)EkP|VddGbzg+0+z%xvN%p-Jbspr~m9XR9iARI{Mci@X;@tBA}K;@jSa9wTo(e zKYYAc`XlezhTksc-rr=M9vw@WrY_|6+H_B+($vy0iNq74&-StZowv2VWfp(O+daIG zBiEM7CdW+=&z>)|^x1ZyBM%<2^<`gpx|8M1CS^SB7g}E7IR4 zK78%@huv&>{)!)c=Z}V~=?XjbyMDA=y3FnScB77m>0Q?znM_W$`xGI(ReBDWbk<3| zRri~`KX~1)efP-o-ur#~c5VN;`~7z*d71wV>05q0TyfLAw)o-iTE*P6Yfs<#QDnV0 zG&!l}>XyBg?l-qD)bQv$$EnbKR&D8r>SLGwr29)n{y2U3?ULKhqVwi2&YCAR@0qXJ zeJ(w*dnx-n#XJkotQ8huTCc@nxBTHr?UD{ql#SGmM{C0Ro`L$YhC;EHQ@oPGC*fw?%Lz& z^;Z15T=n18r@74jbT*&gT6AB~WWC@I^~d*{N_XpeefIruZvE-bTVHm~MP}ta)yHY)XJzhoE&7sM^K}3HzmX<){U1%P4VwHV{QA?p`ExJ*+P5dJ z{^q~j;D_ejc5-*K3ZpX1XC1lKH?f?l@+_0>rF*M4|ysU;bxcU->jn@`v-I{d}&E=f+fQcbk1|pGJ1>_0Q>CUN5`$RY0o>Fs|%kccfyFX?hkgk>!K=l2Y#$?l|DG@%&y$O%Vs{-b^7$| z)yv0y9?5M#jACQj#RV%1=PobK)jqJ+w0qX8E%o2t>)o64>e|)q>z=INn73>F+_&|A zSYLc`UtHt!!*TVG?MHw4ckQu#B+V-wz4_9+EtzZ^7lZH0$St_Id1{9*r>C*d!|kF9 z6HXnLc>JzD?Y-Db-w)reAC9xR6zu#VZaVMWzL}5m`A)CYj$QQTcecd(+=CO;Pc3NU zoTlAWWUK$6x4+Bee#<|JZ6B8&nI5O!ced`P%DeR%Dw9?#>+>ot3{8_U)|UJFFn(*? z)<^!l*>QT;RCat^yKesSih#I^sCsVG8=Lw=de_ON?fX<;zC~l(e}?|`ciwz9{G7JT zH?Zu*?D}%!zp>NH?-ckgx?DEt_s{U3vETfcwXgaU_k%n4qqA>g!hG*brN)}mKD~-n ziE%GE#@6C-LdETK^4(pUlDAK%vh#MmAnIW0e$^invsYwWEpL3kg_9ZbgvWvb8Jy{d`QTKbZok+#`_{$=8ipMLGs=^c{E zjdv~HpX$?|x4I`$(=#^m?{QaCb&F$q=UzPiCt4$Tb&ueqD4q{1cN|eNZzgH!nMEGGyZ4~`w?0BaBcPxDgVyW)f2T&uFSS# z@=et;IXcZUXIf}@XwRCIe97Cp73PFb_;>Es@{TR>U8^6S{(ZOVyYH1fTC)zD?Oh}5 zxoobUm|>-`c)pT#>y-H_qM@I5xIVZ0`S?D+eOHa@%FK(lxBOLH7l}o!`IXZ5`m8p|R>=*t! z)qW^W{&MPvfBTnb{4?5cT|TZmXxoJ_Tf>y>;EddBhb)70cWl4a$g;38;P%OLW}ZovYm^MKa3YVD>KbXzbpUQ?k$}QJJ-Is9l&Hgd(*$7UHp5Fg*Z>zvF7&8I<+6d zhw7NF*Ep|yU2b&a(B^y5(kqXAyP*5BV7j#UoA1RZb3WWQI`!~yTg5bYib{58S?Cqy8YDd)wml!o@G8K3x0Cuf?{_NqL%g zcXf4y(IbmV#T*_1kM8#IsZTg%4y;USIC;)9S9Al@) z84)25h9b|m-Vt?1(I>xfu6dJWbmyL6j-}R$H3e_ZFy3N%_guEnL0as)VC?PVHbP4- zcrCo-al>fW#0icv0<~FQpI7L9m9?|@^7?i3ZC|5XkBWO`m(@=_P~o&mYh$Rc(gOyz ztLH^C-By0vy{@$XZA|^b_vdeZtDTd4_V~Na&+^XM<=!}c+ClcN=<{itv&=TR+Av4u znu%}CZY-H>bIku9XW2^DnUy^g7OekXeKljzwRdmd=bgOzc#?+Cwu{rI8vSR;nbQ#b zV}JRET+5#xk0loHyUqG`?;dCRw2DP-Ix|(u8G~hus_RU{eLwO|U$cAJ{FRaSt#~%w z_M0RsYkA90+x(E3{mt)EPg3;)XZ?M`RyJ|!)CPr>IZal7?=M`Veg3Vj^x;i$Q92uA zkFSnx*(gvm{Uc#jaI) z@To36f!n3%Zs>-GyMzqqeZqPtu_W@*pBqZv|K!vETYd6B!$Q^k05{#CN&XY69c+2PJ3DwOpQ||NG$<;*k{8)ASZDzUK;es+Ak4GE#XUhx4XV^3SaZF!)*!J4qNBr7{Z{==Xt#nrB zd!B+^WLYTBs&_xr0?bk-uw@_G!@E9fKTn-Z#d)7?mrU2Q9R6_m$cKt9Tc&pvHP=4; zJ!#}C({w8{sU)=e(JI-74CM!+2WD*fxqgwa^_P9q_s{avzi4-&XqVTm?WOs;xu5c0 zZ@s_r%iiS&&a>S3As_mo$I7MI@v7dvfA7Ti?Wj!InI6e=uX4hvP5G_`uO75XH?q$< zdvz;+>wM4KAuRqA@?rn($vbLh)MSiSROO& zajO@Pi~CXh@P3C)F6dyUIJS@T)~|eRzWwzY|K+-Cg509LwwPZ zJztfrw=Z3&s*E~kH#>8|-fQ#IfBe4v^X|=GKj*u}{X6@gfu|<);cac>#V76h|IXE$ zFR}6JyGMP`PNf-{1epZ$+ZmmSVAo+x3YT2BI`&8G!}Ee!^W?6qTf6Sz6%&E5L(dKe z?c6+3S(V%6El)>kXHVd?KHn3!-9oOPk{2ksVaJyF{z3J7x$IrM``c}J+5af|rmi@h zP?{U#S)j@-;v@FT;dGYR)#eqE7S407-WRX<9ov3v*7a}OyG(PHAGJvBYZY(Xqn^9c zmv`^mITLS5FVdO1XZIAn@2geTtVt5=G0J6IXIAvI?fT{a3{2Vn)_1q=^85Y5=XSi@ zx4%kX->kW`zcjD?sucT?TJ9tFbgtK!ChI=9w#Bu}`*!wTt7liVye{b!xqUje+Q_iY zLC)y(r+Hc4@g4Vs_kJw-C%XNkTd#eWg1=F&kgk*3-rmBwXBl!IB<~b`_MxKN`@{LS z@#}BCx;1s#^wLNCJedYB1><}+n*P~v=apKcW1{3I6?k`|^{2J6&tee|TqpxlZ)Q`$u}4*ZeqsL~3f=;zx6~?(0vs)-PK;eRugqvwhDN zFF*VDUX^@qW}Wt>t!s;;F1~rU=dHV2#o~SKr%OLYc^v657hq21u-CMBQIH+CujKEv zlnW_8IzLXSNIqh2r+(pj@a1W@19o0na;2MpgZtFzm6Eqoc_;3i+wrvc(}P;^kgIE7 z%)htJtDH+?(S$=?8Vi{4V7}-_{)c_pA6;)x$I3Z@8+rhUZksN>90a z9yc$vDJ&LR-qYL|bG&m)eVaVbAKQo8`C_lP?ed$s=oQm*xA{lwRA$b8n;4`2e%*3i zYmKSjR&Rd$vw-dD@fG2x>$k>jFwyU_6W(p7@Zmp0&hs~4=wM*^I++#Ms?EUw<|9t)9X4byeYk#B{ zR_qS_@N9E^?zhu>`5vy{a`Mt6*N7z*^OSzLaXmP$%wv#v&fx^xp3leS*nXJ(XAss& z{b>7bX7Z!EX^-wbl2v+sL?~+AS{qdtA=`$TNxMok8Y3SUnQ5>$cG~e)+;;ph|B!t1 zF0+NQ_uKVN&uU%vzqDnpi_La6k2ik~3-54j6Fhcg8t0r6mPdQkkJ%|*{lob2`6Jyg zaq4ZCkB0wKyvWx1P;-s$)5%A~ctzLvZ=K6?W}^8m4+A3|pLOqdn3h<4y8bPD@%?iz zYkw`j?vk|htoOXw{k-+u_fI|Fd*kJO&QkkcV_*IblegiIbZ1u9Z+!YuXL`_$js3|d zMVtLjGUup>drW!Q=dW%(OaH)r<{wqHM!EJbzinpa->f@zKYQ+;{hM7)0&1UI-kNUA zS3LKprGg;n5WGLZ5A}uXF4*%YUi_1--cj1CeI!@*d}GP^O>G;eINB7&p9z!SqO>4+ z?kA4>$}*4bia!`fe&mre{J{I>pTS4BMfrYR*TsHK+gVzguYT*xThDjKB9lZdt1~2+ z?40K7-95Wo;^slS>E3_##ZRxkt37Yh{2$SEv+mmO{uP%0I-m8A?1$|QrQN;Jp4;|M z4w}Wk>F?^0tM5(4yLfN!SsYb6z=w%nQU(WtA%Uim2d%|>HX^tD; zati9Lit|jaY`^#~KY#v$OMBhL_kR67?LUL^sV(n$YK(XMFkO`y9@SF($A0s^-Q_#4 z z)wFV)Ypj^ZtZ4ztTwl+gHbT?w!sWlU!%FdI8@;{@Jp#9o0^E z<_Y(n7QNIy*@F3Wk_W@Y8~@JN8C?3q`{Dkh`rfa5i|*yK>38UrT5SCt9W-rwz}mH& zV(uQ6ss7JUu6ioQ;zoq*k*4Cwp4wNstK2_s*MENY`p>G@i97YvuD$(x{n?+;UF9#A z{&jxTf4HBuV!D&}dGWikFL&AX=c>;v=I_uqS)siBmd!!y!#0VJ5A}3<#xTFip89cH z^9Q>}@$4_^l&+ijTJdi>$k+Ms+ehulsQBn6o2g=+R<5OT4!2}ws=^I_C!c*Jn}49_ z=8YfDVIS?AX1njdQfm2Sb+^WUhFTfRt)7#fJrXl$Q%n1Fc%#9hx0VV&-yi+YAo(Lc z>hH8Iy6jOOQ_nZ<@h*O7AyeV-OCGzOR4rrMGbpQ{TDfT>j!&^k?m=SJSR<{Bpi~i$2qzkSi)5_N}hWUXe8`;`7tX zYf8hn@7eAtu<$dBkWQ1iyH@DFaCR2K;DaCTD*jkA&*hL=Y#6Kdhd^vS-;I+^VW3P-dx?3$!98p?k+9e95x}(@Yu$-#7Uf@uf$ht-;B}W z5i5oz?H`N3MMmoFsr<0$*!hs{`>*V=emMKLzhLIKYe|w{BDtberd?krF@0L}iRUxR zTUGKD1V4-NW<_uMD|f$a-+uL?OY){K|CW8R+j6;v@!@&S7j-IIH}&Oe9gp0#>&jNm z#aGmO|GYewHZ|?Dx$|X(W2b~AW4Au5ub06GsK;_F17UQ`JQ!K ztu@jkdDd&}nDxprUt{8)VvLw+)V)iL zDrYa4%KLOj(1M+xzOO&Uk@Fzk^z-|UQjXHS%8$}JOx9+5f3$B`yJDl$d^F6gNmq&c zM1Gvxu1O|ok;>az*IWwR!K8H9q(U|Q^scU-W@|s)x~*@rRdcQ0@BMr8Ki~VkU+w&g zKVttGIQ|$vkja?!uf6nPy-=);?~1#x7yT`*?i4?rn!R#q*^UXTS`L<31X%F|*LZ$7 ze;`iagO8Q&m+iaKnrqiacv_z>wY_~WhI3JL#qwGChJUtYJnZK=z{)i<@Mz8Nhy85c zAKeee3z@Fw^}8Os)3-ce((5DtWTi{qW?jqX&bQTh>oL7*Lz_ipSi$edWzh|uqSq># zNjo^8uRAvETX)?olYM_hubf8(FuzHdYP;j4+S_vu}#kr(UR^=z?d z&#bvmtWp|wX+weGBpQk!=DPtJbtI>zdod*A+u{kI+Ebk?NT`}i)| z_^!+-|222{t#>8fn`;xI!nt{#sdt~b!4amYQee*9dsD)~bgL^v<@dg={%vO;ujh#R zu{!=@&YsA`lIycXTc2&%c3szG;vUsW@`gS)!%G=f^UPy&P`cl<{txS#iq(hbNq>~v zxTIgmxm<4jl1y=_Nr%fmyGZD4JS1b&kox0b+)kPBp1Ldl83g~y#=VaJu=4AE!5{I5 zey#7&T{u7D>ek!0tKGI-o)*0QRA!{l-F|;-gFH=r`Aui4E~6QY94yxm3?#h=bzeH``4~{zrx$|q`mv+ef_^)U3aT~-t>>} zNAv-!qsP`RxgCFUr7!!Dq3z>0?yYKz_d%Aw&(_QW@|D^BN$Y0sgneTeN;m!MZPqMpTw{7h^ z_-J=|BG2Q~hD(H2vrLSBdc)v3!*%&J6~^BxcK)bb@M?OU&gC_KblFy#Elhl5VN+V1 zmA=1JQ)PKT-`lv#u6IFkZV5)(=l?jT+g^VY{-Mj_-Lc7u%d@1kD*E14m(I+YCfO8w z+Q>qyW_9bmqXz>5Ha9Kk+MoN|D#x?ArzeM>eqR2vF4xY}`ls#G z^9#iv&TskS`5;%=ZNlBxyI7YV-L^d0_S)vsO68Lxiz-yj{k6R{ctm>79q3`6`e>(r zul|Ru$48=UmmfJV`cf)v!Hc!oi(mcI%(at#^4Tul==^z)ci|NWgp_+ex*uAbf8aj@ z+dt+CJ&l^!#5F$(_DJO}+qqoc?%JnUk7~P=K9x808cz-2}jPE}i62Vs|$FImuWpAvN`!=c9FTZ(VQC{B`gC zynAIYe8f#m?w!7L{m=GK`-69UjBl_LG(Tpservpt|Dr$6*Cz4n9lCt%`>mAk7Y|Pq z5G%NQY?IQTJC!Li+lnX8TOae`enXw^>t))jXZz-de)uxa_w3fT6|cViz8Q0{RK|$+ z#4(j*kpn9=_%3(-Z2s2X9J=YFc-IzPwf_ugL)fZpEB?BjaveeX{MyrqimI z?QeE%e(Qd_V4>5crfY_u>(!rrzW6iy$>s9yNpJSO|B<`z@e5h*hc7SHsZ}gLW}LP8 zp7)yDkvE-_93$2|Y_L2a-H}#4^~N@KY4@CYo1Wd%-d&k6&tw*7z>_P;htTaV%~R2MqV?wN#;o|u>2<+@t;Am4$ys ze0Mob*0HdePk%?9*>uc&vsujn28#!Uak9VlVsE~_FZTF~jpgdd zY%AXBO>4jWUVb^}=$0~}+6AWT<{R)7+X!|y+}?2bjAZDE^T{8-AG5MQ(%)#6-noVQ z;mV32d7;ehQoYx2&N;Si+pS5ur}yMA8Xmhit)?fpx8EeJwCqd+t+)xf2JLJ70+kKV18(=anSs@4VylMSgp=B`=`47d^HA1JdEy==o-yH@vV-$rR}?%enz!gu5D^WIB)u}*E~ z@(C*yr%pZTqj$-`@UxB7x8~(_=i|4??O*p}y7T@I?^YKU?DFqWi&*n!YxMg!6^YZT z9nJ)Q->zv8+jBGFhwhcHc7{!-kH&NE(f+7?Nb1?#m6^NJCMW3Yz13g5W6O^0$VnbS z4l8blNuJ()Vzb(T-o2{Ksk)~7?me!X)cq%P(#xe=oa@tW@5`=Qw%>aDt9iRO-z?Q< zsn9+=eILWcgpY{|kGFiLNEtFw*f}a1%LyhwtY_+^J7BH>4|1vCUl~UZz+xL{ro3A|FV(Mi%*sr7Mi{@tqxcvc}-Y^@r=b=COYG-j$O&-F^FYwMmz* z9ge!0U;b^zEl1Uz{grH?pHfsqCby_BFrE6_>xc7^{R01lUfBtK*i*=Tw(8M+zg@d_ z+`1SQX|wv<%vhd@GZ{Lp-h}boZeS^u3(fry{>ZKU;rdQ9VfSU(RZFbCF7LI`tz7D0 z@;dWj%bc~ZckP>7QXW^hJyvGg#K8OUCvSg@ z`e{AyhWs`v66vZ^1 z=*FIK<>U|NkH&Fbt`n=6_5N`Ck*cptzpY$V(d@Ks&sNWWw^z8m`{eZ5W1?llwC?x) zcc*F3N%%OQ#g5beqx|7`wo>lby3W2Ij?G_G(WX7^g6YXG?=|0Q{=^R7&q<}p)w zLQhSL*5sbGVv}lCKfm>7{fAwbZr{6QuL2^+N45F~_c3-Kk(0gtN07gzXArfCesI^{W%K3B?wdAW$<<$LD!$V^AjY#;>gU8QhClsyH>x^&*d=~0 zjD1>}c=79d{(l#~|9WF5zx(PQ`$xQdhka|^&Q+JjT4iUhS5Zru6jIc)UAtN6MA32u zzU(Wn!Z!LW|9Db&%l13#f7}7B`6d=?pWmnSCoZ4y$85(BU;CT?I6l@*kJz?rPuq2; zY>`h3w`YX8mF?cySarvZA;|N2pZ109qxKp98Q7vK&fPn8@9~2#^M&)mu07M%ez9&% zf>q4=&2zsCyM27JO5RA|jah+lSEt#UOZ)#A|Ml+jeN}C`=6qo;&n8FJ&AZq#QnliY z+`P733RhluQ2D~FPw$25E>?-R)uh%hJiF-ibhD_`x6@{?%KmoYP>#v(+57e1eDvHE zs4ez+;c0j2RX@9T^cAjrDDrW7(;n3iw;lFhKHqK1n9bYYQF^duVs}{Im3KR}{xg`| ze(EqIe)jj-{S4PTYJ7iKKCUf(^l#_tsEX>qEBSg`*D8H?xvqHiVY}O=E!#i&bk-(a z*4!~QGEY+GpfT^qHx*qUH+zN!{x<&`{^CEwgO}-D`8V&r&;HL4U%r0YcH3L$AKhJg z{^-en@@E&k4l_T!{PfC$t749F-HbgdePj~H46hUwlNWXwS4t0Btd3$Q?o6xazdn3S z{&!`T+IDm=CUZpSLG(`FHN$q2DRp#d%(}^9~2MJ$|=SJ^Rj~J?Yk;jF<)I#J)V{UpxOF zt7`qOv%jY6s+#Q5UjK*Hy7s&KWjUE2{SWmeUhV3exu9aP>vdCI({1sQFL$+=3rB4$ z>})~qZ&e@Kdu${hd4Ab;@#|jZhfk-?O4xY0#;$++7{7U-n+H;Z>%lJnnWw^(+v>6U#i>jL=AbT7I6p6aRjSwHa7ALS3z zJO7E+SbV&^y7~Hhf2jGjCO#u#@wTRG_wGe6`sAjPvMns_-UIR9XM3)$ zjw`>J`)s=IzTchd%L~*r+wAyr>U#oUthkY#_8kR`;EE^JGOnO+xzH9!Fz)X z|4#63QefFLKVwHrGJC_)84o_j)WmOTZQH&oL#$5tLw#pW&a7kIxsN#BeQ8eS35Y!R zX5*Im*}EDyPfZF9?Ku80_)pmeJB<%p_jRuPqd57|w5heyHoq!%Zs5$eU`{M7)k&5& zuK74E_27Xg;e8dKADr*lI=?ke=EK|PhgQ@$e^k1CSnOBs^gffQM^0P#&28p-`%!Ts*3N4 zC*4nXuM{)jytTM^qXok}vb$$F`6B?Q2r5?{P0$&9*3^-QU^l1V@Y4 z`^H`#yE&yJS3F z(^&IM$5qu$`}b~N*Sl@c+WON$(q+4&Y->$D)t^__Rh7K$h#K`XQJ*NY`b+$GIvpA z%whS7Ig-sw+^ZS4?o3*^=GL@pf90R1zOIg5wK7wB{p(-XFZ{AUcF*Y2Ke-?7VIS4D z8b`m3irszZj+J8Bt;LI0Je@Izfn8;S_Sxrumu*?C&y)SD_j$`6|Kzn9+oy?z%Sr6$ zx>zb+eKWc1>V&_GPiVYV_}OzwA^VsK5+hxc9k4F1S`yx)G`vH$R{gFC0mD(bCY zJkR<|Q^=%yX({ZdBE1q8Go9d9+VlMIelESmTR(o=zP3hb-<2=9sV1kT>zT5o=FWY4 z=VZRkcOkz|-bEMW@88+1Wpckv@q3Yo-~Q=${xif)dwEs#^0WU8{4clOUu^3h^W%H| zLtD1vfgi=2|F|!Vxubga%&gdx8>cSVO5d*e&X&7#bI++&c~^|)&r-OTp4@7oaJ|1m|69$zEC0lQbo*X4 z5&7_Rb#}Sb@wOWEQX6sZrRo#9d322`=eeavs{Utio#<6LJM`nk(B~i2d!oOkf2?m_ zU!r|ttG!?hui9C)rmO3jVs#AtOUtfRcDyOuQExE!n2GQ6O3SUQKgXNLnuRgl?sE?g1F<#3vW$xyyf=X%jtz{*_wGOc1**iMhiyWR$g13b0;;qTxRO z`KNa)cQ}OzpJbl(xXx<6WX0@bRq^fi4`W{cQ~!}KQh4u`(Tnn3NoGq6Hl^yOMz200 znc%zXeZ##(tvwnEU;dd^*hl_oe7u%*%l(!OC)dBYwY29~{t;fcSUV4$+({{u9%s$u zJ@;r{P9lqIxk2t>h9;>U_t$LwvdjPL_WP?iU!Kjo-~V;r-|e#g_?lNyx5j2=`!XxuS&BYZq59Wk>WGkgA%ew20 z9#u!WEHc|IJmZ4X!~LbZEpDx`To-&Go^Mb7;veT9zMEP3(DK2>KZffo+J3!`$$lNo z^5O2^wQFZ-TnpAbKW|~P(p1|SsZD-g9{sv`|EYTZ-pGH>( zIqIc+8L!^{c>SFTUWM)wKW5MWGM+Q|xUy5Nz4z6v>{fa6m%41a^QzzWq<5ttgG1^L z)s%xa`KQfUdaK`e-CEk;reo^=BdpGF#}&P$6|a@A?x~(fga_55Kp*+hcF~Z{>rvZilSTa;1sgzqK*qM8Z_%O0I>Ti+El|v=kf= zVpIvc{Kx*I^~3XrYvX-B{P5$~0e%%kX;7j(nxlggTv zGCdP#Zm+prHtpZ7ShLS@Z}^{+`M@Kba`o%)h(t4~nCsRxQ)X;gt$AXSkx${S1*JM6Qnx?2AN{L* ze4gT!FWuVSzcY1?^ot(7<)*Z!&+^_qPo-OL4(M#$mE`JXae_tWdi$S(ADoZmWnOQe zW!9VfC}Zs(ACqSv+19cxPUP#hzP%-J`82h&&*yABvM@|w<4(bL73S#q!uvR0OVuCI z)))9;@A2>Q?7&^wyT4_u5o0XcbR;2r#oPcJkG99(SFA6QPLr2hq#d#B*VdoeFRO3w zx?CHw?N0RTl>ZE8tG;}ybKmuHp1>uOdV$wgZ$aUNfqS3!9BnBV{8{8-Hfe{}@01|7Cox?w_9_1O?fWD1 zVdmGfe%7-)CPm~f+_I_1v#``=lTN^4p-GJ8<)M-a#giOZ`Xzrbf7pM-D*j0NN58EP zz5Cf!x3?dgth)8uO4-kG_fA`#F1B%1otz@sWzgj%KXZTCjOUAf%zyZwLA)mHhvV`X z--q#U`h{Qay1Q(Rjm57yLDC&>^PMY`9&$KbI@CI464T}tNd_Lb7I{yfzHguRtn2?7 z4wT-0vdw7O?fphyzCPW%*K*zVpL_3~et*l^cjJ%3NBUy&oE7sj_gEI|F5CR@$AWEE zt;W}cZj$0_`j#=}WrL9zv&7HfO`A5Uvv`f3;Nlxa~@7Kb*Sd%8}!&tC7O-cy0w zl{Y6C>TBNoy|s3U(CzS9A8*UI#c6&xe&9bt_r%P7OjrAsKGyvf72dk|%A04qH*SkH z)0U8WR=KXl=l;%nPtzuDILvy&^isv_hi$7rN>4Q?shPv!39gMxM+2pIGew`dzu&z5aeJ}xWB%jw+pg~l*;U_NCx5@rY!TW)#{!5q+Pb5V)3jeOFlJj zsa;w&mG#8kPkrsz>%@N?_xrfNMPcuyFRSOW-Md!#<@%-d%3YQ@Pqt58SkZOly#&|A z31)^d$!vStK7aGsdv(j)AYS)lorSe4=H@=z+3)%3g4tr-E$vyA$GPQieqcnD6!l!S3E6#5|z4y3v`QviQV+B@$vPzThF1i0> z%YTOEe~G%$**|B+KfHGS(|-o#6(rmS!vIsep6Bl(JG~#XA1?3sqo27nORBv6i}r2n z-)p^Nul&(nk-ah6L^n^_E!y+Zr_h#78z(t$;rJcC^E&TG{X_T6KH7NZKJ*uVIZtBC zEKc3`V*eSOta|#S8$KHB=x{rb{$8i6$Y@Wo(6fuje_i~y^w{<9eVbkP_lI0uqp!4Q z`<0{}O_pJw&Ll}5)=+%>bN17mCpE{LEN!z-*=haweEdHH-=4t5@dwr0w(@glMcwV| zXKT~Vv){VT=3B_5kojjneU?j^a{S4J}Sbe~d-S zriV=z>nv1To-Mnv%i>H+voPCD#Zq=_vj@yE`__M$eB6A#wEaWNZ6B94s?FpnymjNH z)cUsNJE!|KwElKz0&lzOu$;Omi6QiBeC~(#x9I;2Eq4DI4x1|<`OlDTrT;Dd^xyfL&;6F&`b4(o^7i`M=H<7aKAwA1 z^4OoQ->dbfhrhmdeS6-i=O2C7on0TBv)Amp`_H-856$oX_MbsK$8Y(visGotUnCcQ z&MED6ezVW@SDyAR)2{M~R_Qq_rIWV@#$G*pC@R|9X1U;z)*HVVjIvMs6Z^xq>x24{ z+~i~Czvi{;ep{IS%`)M`y~e*2MNb-a@*chQ^V84IAGdgZN-*XrKEB>t$Y1!lQoDQ5 zn&k`GS1c-j^4(6m&g%9TsqNPLuK(emAOCoMM@jz8=ZE5q=S6FlzL&Und$!n>r2h;q zmly6Xn`W$aeCu3=ZBKd+&RKp>dXu&P&EJo<-aqQk`&uZ==;igQw*Hl7^Hu*%ls=kS zEA1?LL|oAA&6~!DJ36N59QAX1d~K7i9n&lR56Oq`aZQhGI~??3?H_Y1w)H%250j!J zJ2xAK6H=yWA5Dc9_8&H&v)uQzw*w;XI9XR#N88x zOe)^Wxyi*89+z5W(&G^qF)8b^=h-j6?SJa-s~7!Sx^J3q{6zKj@qcFhXV7oE{WE&i zpW2VhJO0W2@W{TUwlr~h)UEgC_1Pb;{@bZCCskBw^F%qr!+8^%wLVUabAGIU>-f>A zyt&K#vlACqR0lL4npUTBrKGo1ZC3W_&PO|Dmb@N{%MDK+`QM@tUOv-`uxpw+y3_45_{f? zYq?6No*(+~!#!rwXMgeH`#0TEv{igmdK@i;&oNJ(=rH+R&4O!XllJa@`Q7`&e*K5b zzAt(7w)FDX{>Rz>8D?GBzo72Iua^6Q`!|1`YyT#S_3PrhFE_d_i`jc;;@ieWlfVZa zai*RrSoWf3{hZHn@Aw?-_HBI) z8Krf4WGvCC*%nYX>y^%ek1h5hirYU6{}ZtGWeqrOcYMe0D^WS=3hx+|e6~68q)D8+ z+#9c5v+>{5*}MPjtyWe&v$(S6Z{F1D_v>BP-n?<=j_T4qn@<$VWW@F_U{>2#@I(5y zU$(s9pY(^XWkbJcuCWPR?z`gVt;|55=~31?1u?GqCGVAW&Z=%bQ}#djFh==?~2#4qBQ zwyXDQ3&ZUw>o<|h%OW0yCCg}`N z2u4S<2_DjAz4m#&QCQmNgSv{!46G}!fBMhR_MgFCceb{7&Hb;RUq0Vuu{QWy%SXMr zA5Qy!l$*ZtMV-Fjo!Fx*v&!mZ_ibw8Ty*>CP2X(Q!>7~UMYPQ;yD4`e(%{hU_g()P zgr8s9$CoW<@!{%`lVU9=SDaJdoDzLHA~x=k$>)H)@a~Gp;HCm+!CC&Cf7si7KYZ_0 z{>ZmdZ11Do`hfYNJFh+1;2po|uI;X9ooN%NO*pto-DJ(EVY_fkNx;kvH$HqzU?0--9DUg>qvU^&N<2) zTb^{em4>Pay?dm>TIBY~pW%9{clCbzPyZS04|c{z+P?i)|MUmwGB*k6_9n)oH6c4c zy3Kz0($t^t$K=ERq?-M^qSrb-KO&`bYf7D$+M^KH^8C#VLY}%4Z-h7enEq|X$M-EZ ztPgaZkId?Cs*zopT_?Bm+Vg$ab)I`iuWmKmB9$ZXcZE&yo+74Vm3FOzfgc;ch#y_w ze)gmPf$w$VxgIa4S+#7?{k_I6**{~`YX3aD)Z5P%ZAsmEp!ndr;79ETW&MxI>GmI~ zll{mye_5%=GpIpjcpR;i1eE%s=-!3WPxhQ;RrD{Fb zC;RSru|3Hj9zRNNEBWoZ>6*#h{JZ9LEB)YPRPqd)%J7xjMUA;;hDKYIUJ%d>B}Csp{Lfs^~i+lLmD zoY%Q0>7H2}nrZWPo~J_1J`afmo3sBJbW7wI|E~RG`}jYDV8$;WJF$;$lT3p-w(q;L zU~6>!eMgfILdiSI{yvI6-=lf3pLcuFyFUAy^M2Wjt2{nhcKFcBN83Mky?nj+ap3Q4 zv1G};pEo4lIsWuh`J12SZ*uGR zpFhr+{NzBI#Pj~RX)5#7`&{|@mPYzJ@dvuRKl!dE>`&?T7p8gTwd;S#+r<65V0FCZ zKSSDko{G-_lWskZ`B-{)|F@#pL&xr2%ums0GIH3LvPgqNI^4+PX-R! zHkJ=oeyz~?D$kWA5c5K6O~LG`ow;dhXU+=e+^Lzc@x;Qmy4*Du%Irt%H~V*&_H*Bl zaDMz(H~;I|+O7A6YS$m0 zlF&Za)q`wqEcD^P{=b`exnheJIaapXOXyxc0T3=oQllpYsJDx8xSG z9x;qs5gNQJ&*xDZ=SLAcgwb;3fBW4%djCjm@!>el z%SW4!z5S-`9c%RZt%lu270=0^yJy@o_4GZGwek4=WSO38={^4$Qg`ck4F0-sxS+C-kmsnczX|o$wl{8%`-)2*q`#|E$jX%Zdg^!NRee&<#o(oeKOfuFsI(BX2nK>M%mW8>h=e=9f zrMgDEZh!shrTewFuMWKA`{m{LnqT+-OuAUnQoohG>;A*uzOB{j?SD*V_G!&N{Q1Z6 zBUZ6j+h+1^ayh3jcdT;pJl#v{CkS~?=lv{pkZFCU{g&h1sTckTuFI z_15{Gxwu4e)3h^Z0uHG>HCnoi7NH zkBJ|JJ|F&7-c;kg%)3ymyLA1n%w4T>pH6mExp4R7w~X64D^z+XWh&O)47kazwAXG= zm3`y(4HfHU{LXGg;t3JkCYL#L}qODoId2CK;q+^+7dN5TW#O^j47oLD%? z+BRM+&b*}psIvsXmL9FR>mI&8J8 zO@(16yQE43`(B2)MP14MQfDU26!Cf$nXF?jU^t0=!HGXd8SKEzx>AadYoF*;vUa&0 z8``K*z{kJvAp1vA*1pxLt2lY*KHYnxciYaU(Awn}9(>4Px(ZszCMt40;Pz^h&F(_$ zjA9#@)tT3?e6(}{QBs}4 zT?SeQz9{rfxs!F6jIdbXmsM~us$NXnlH7nwxYR#IB|GwyJe>s0-bv$Rxk36-H zYp+h7*O_0RUb6X>`2Lj6yu~|u-|ny6S^0UIjiyC%>%nt;cAGyOe)ylEZ|i=crB}Cz z&pLWIQZ^(i%jjq!%Z9e>$of)+3^lGNYgTSN_4t_rkB`3e&;5t*{AWhQL?Hq*_vzkS5({Px8sy_@!z{`_aKZPs8|Z5hOH;z3B~ z2kV1@HP+icm{_??yC=1E_FJ=S->zR5^vyS)lP=_1u!(bVp`wmtjNlw5cG1jRfzv}Z z_V2p>W#8`1btk5+mvrAX>G%8m-M(Kx+gLx&xT^kR;>YH&Z6EVHO0@gge@GpUx+Hcy zVn^F4>AAPMcc0FZKQY7p=A$qn<(k!vU0=BCMY64?ACqsLoMkr4_*QnE?B3m-(f(6Q zdF>KqS9`6toUzdTGlTY1CYz0)rS4zfpZ31}pXPPNt6yvN{xbya_O{No3*C9`{XOmz zx;JNu?rJ&z^lomZ%)6N}la5R(4*%o&!TiuX&5vv6ckBOW*uH3sL%+xp#){=8d%>#kGzl%3PRE~!_W|BrR!G53$h<0l_C z{Gt42R{kGV`47|WW-C<|RVKSf=WpX)u>13lLl=+kwywEU7gMwKUFd23!~8tzN2Y)L zacM%u&Mo)fOkP>pKRNqZt*)Aq*&6zcq z&zc=!G&<@+qRT#V`gO{&&t|d zi@EBjcs5KBlAiN<-p=)MKdwHg&UJoty&-2yZSm1r^&MaDM&`d~o!)Kgk*joh&DE40 z_9@Yg43W|&8v6BF&iTh>U(fr)`f$I1??YdwXvdO!vXkFN*<8GHLQE_}O7^~Xl0$Nt zyxqfdRS&LI?VaCJA}^D9Px-1|&X)fSLRkjAY=^3E-`8CpbS!`KVQI5VZ$dV5?4F@; z&Z(!wcX{;O%=hu7Q!dm_e;c)IedUTh0eN?yy5yx!KGk2w zcjDVhg~9{tu63@)*srS+8epZGW@$!#ZE1PoH$XkNui9 zTWxLS@|eG~US;afU6&VTxrxVLd9pxFQ{yqN2^?SM->ik;k7yXFMFI*=D_)`>?;QT%K*Kuh#2;zWVlW<=6AR zTlJpY(gxncB6{G{nM|HjE=Ag@WqQ{w*tYyJ|M2+X@veOWF+cL1qjQT7%z9HS`F!)$ z)Ls8B2yHYvalI*Ql6%^@l+WD4_q(>dd3Nc3>a9Q9;RgYtnwD{Pgv6cdq*z zd-dAv>`PbdGiLKj{}Z(MYMUJM(QU5kzTdUh{vWn&EKDq35qG+g$4enKGv&^Ht&JK+ zN82uVeDIg}(f%lO-PYVCKW3K37VZ&#U~3;k`;SsS! z{ke`?NcgUr{1597Wj_6|Z{bJjBZfbsat%)N^;Z1T+j${2@@kl?r{d0*omwI?lL~6j zC4Rhq_2e$Mdi*;-eYlZ8A*$6md7B^k*V(qzP8 zd$i!6=7;YM`_$v)xw68;rtj*Tuqgeq$=|@Y>)s}pXtqbFM2e}q-3s03#?B%bmhdBQ z!k;jI$@+{_irX(-ciDVNDQLnWo_jfy=9o!ryD^1#afaKIcaOeY($1SVt@?ZU<&P^oES<`Ply%J}6 zLGP=s!>d{QZ|mkQ6}T0{`8MI+p|eVrPq%-%8eqWXq0SqhczsHZ-iP)>vC_wGeVNC; z`_j8V?#cVE{W^C!D*M`{Yo0d8TXk=RN1JA{J(kIqNRw2U7|g47`|U6LbFcocy?1S8 zjlKQrf9yrq-)cWv8+e@9`o|lw9Ztr0Ie&fA<%B}{^^-CshsU=tuTZspyvzdimpb?B0J(VeibRZRK<7ml#*}l&xm`?j};kv}qq##(lmmf0=%L zet2wo`ns3jr`x}bTDxt_ufu1xv=)T#niLtkO(T>ep38uNX;S$9Mf+dv*)#t^aGCej z+h4wZjxV;0{@~wZ`kz7kkJz3aAI={+ywow%x^Tx#hQ_p{O6)wA>Up7EQicA2>s zZq{N9^JKeTn&7|a#jo~5f4L7`zYv)n#q^^-_Ud}YIVV@S>xh?sKK1OGz_}-qJZ%n1 z?9CHIl11k}@^7nS`q18#crkzJ-peM^Z?g`0-iu!O?%t;dzCyQ;+FY*ef4bS7ry`9( zBji+{*5tj@f8_qtFOB~i8#mA8t@h88`@Og4U;GmFbKY|9nTfqO%ccc*{VMcdXMg-Z z1HXTNsXT|zuQk7Fmo=|BzQ#L0kHfrICP=5ym`yssdiBKS-3b!UO>YF4J<$wHl zpQ)@(-8a8)|A+q!28+5hVC{RxXZ2fT``aJ>C|>oW`$%}z->u6UvrS(uYkb80SIzT| z{%Of<&qYRJxvN&)e7fVw_483j`;XX))ic&uPKvERGC9+ZXZn$-7RkNkBu>lBJ{+-{$7K;!)7{bH#fOzgzE{5z`VOm;9?LV>~4L*G-@hnkh;_7w({LeNSpL}lm=s&|2wTX}Xxhu{ejx)Sis(n`9%j(!{ zzO}3FR4n@u9d+ot;h_ubCJC;6Ce+4}q8b_Hd;jpPZ`SAA?n&*Re`vDZw2m5G^|^~x z_w@6W^*-4mqht^kD7~pQ!Qq_43AP>n!atNBvMv3XchvN4*d>4I7+lsCH>`JW1LJ-3uxyX5kkCEK3uKlOe4-Rl1glhZU!J&%6-Q@=X$pOo@3 z-J0}A%pXsdDz2(n{M&F(tzJ)!v)Q7@4%La=ZYEnde72b3XIA;oA?TI={K=cP)d|;K ziWl4y{AfLw^ZKI?%P*NWFaJ65nPFTS?q ze#zS>@3m{M$CqyN)Eg|-{%5`Bhw{QdK7VugkoaNio%z zV&fL>-p&@Wy?vt5f(aZFk2a)Se{VXUw<6uA*8cb}|F#m@>cC4ivFQu5XD!>j%DrSm z`}U)6^sZ03J#AvNmYcakYA6r;@@+FetoQw4S`(^PxuBsj-Ku6t=W2| zE3xaHdE+#bsq;(@PjKMk<_Rmk^@s7Yoj_*8kK3*nYnY4cRi@oKJHIz&Q{S&Q??0Ev zA1_*8s9g8CB2!`;-pz3_?(NH_`@E=6 ztrvUs&+Lcvf~=~?cTcZabA4M{tTWp)j|aPFaoh^BFcRr47reTu`da#1(aiG1b$<-6 zy(vAL?Kh)hyYJ;CTQ*)<)^zQWWQtwVSuTUfA_jR2W!cr6wtV1k*pvC;{o$&(+1bhU z4{n=nzGBk2D8(nX%--9O|0 zGc;Yi^RfSy^3i$PAJn_|WIkfAm)2XDSiI!)L&GJr_rLMfE#|uGUZnh=!AA?-6!@VTK|YYuk)kaB<4qRoNoDbud0|>ro+)EKUf z_s?BwH$A_8=}Yr{YI{GjT-KNuGU@&KU9K6$zn2&=o#J4TE2`~3zE7@Ry5_V_aeE+>KcFmfJvTC`MJs<&TCRT{_O!6fJv_H9zCC&kde^>2r58tSdZP z7JtK@&!nII#eW9=nzvfV-L5<^d^s!Nip|cvD}ss@(iIc$7z$N(dB3y%o<3`B@cz*J zy{_8(cfNRE@_z5hvaGwZ|6XrT{l4yY+sC~5Sreme z)^3T3t!;b7_BpxB!O2_gNyU}b$4{KPzBhc8Ncj+spHjA`0hPRBx=WMl;y4W`!aL%t??kqRII!~xJW*R+;UFG&HM&v|5`0u^182Q## zv%bCeHp)_|bJ^DZl8sOQT{a8|?!B*ea7**2iO0QOt+&a3_}A`B?xTQASL z?fWr=Q}p7rNVQa& zhD=$Zx8N(oSB3|5p}O|3fBoClc=YA3leF`X6~_rn_74j3hguiMU-xF8{`Jo1+fS}8=?&xjs-3HF$M@-p4>6|;>>|FUhE9v! zz2{Ay#qXUGNBvE=EtTDS`HuI>6QAPN_!|9uqLajGJxybVjQEM490zSCF^b*~bdbx> z&0eLG_Riz0;!6MJk(!0idN!SLY3P}-cFW4+F=Dniu53RXk)8c8uQYIl#5&iLf8S5P z{yonB!}Db}`)=MnTO})U;KA1F=+~lqnk-Gzw)VX*ag=)GHbElb?Pr0R4ELhhp9K_g z9MjGZHg@?X`uFJ8BO9aN86TTCb4l+l8#b+G9mi917ysCNX_uBJr99*Z-W=UF^?HmwUh z^3FI5c@^bqcEA4&6CV9%xV~rpFQKrn3>D>X`ybxc7q5}qa^b7SpSSkGAI)0R3*~(; zo?Ll*G5?8KKbPGJL6f& z_GB%+(DdIYPwU^77qwHlRip8tJNtT7+~T#&mD!2BZBrD@h0hjRISQ?xt5Vprr+d=5 z1w3=E@-zL(xbP=aZn5YU@i^hDU*7tfRG*bU!pjnF7`w+?r;u4@le=QY-N>5lInA#) zgjqkT-nHL7+28K`exv=fC*Rz6w)W5Ux>%|DyTA7pZ2DMxq>8<1(Z}>78HzvB%hzXl zs(0AfKIUHC@yK<1*yb&>BX0Aov$<@4Ciyl?vB{n(zvZ`-tOR?iyCm)&u; zxgOl{i0AV?mv^5$l{QWcS^nO5M{)Nv->OebpD&wle!u+rjcT>Wd z)sJ>pSI_5o`Tg6qdD0*G_FFnWJTLHCY(kc%P`l2@<06xF-f*oyDLqeGwJ}KR?SF=2 z@$J9D_Ug4)Kc3GU`8v>N+NaFzQhRsBELrE^VmSSdWVZ&hq@~1b*Y%6O{NDfYXm$SU zzqQAcT@&Ox?8M~sK`yDxs*r{dgy2IaE9JDyt{({BH;{;mAO_hJ?6 z&U>_bU(3lc)m*RpWp3R!iOK#~Rk!LsU!${WCfkKc0x7-B9XD1cXx7IUoDW~Lb3a#& z*N6Uo)g9f3?X)i1xT@|_nXEQ>#o?0Vu-`q0%q-a1S-!5DGm9If*JZt&P_d5*iFxXt?~;l|;!U2Pj^s?~XVz~1 zDq2`CVW;zAKFc1)l|OdJd#`+x6R=_H<;OxXcg#3%3W)`rZ8&?9fi=m4Vb-Zdepb8d zqw1_ymu_6EcIE2S{|xJ2R*BfCew6mQv`6#9tMf75{rs=)DK}aB+WYJcos>RP_sm+! zj3|$Tg2&#@{qb1xyy)lqZ@Ft9ZTz_Z;ICt=KYWhgzv1$p_+^n5Ro~9N_c7b#lw3Y{ zYTU=v$fc9H%8#uQx?7jC@%;5&aSP?vE{lqHTC*|c&}5|@j7^)GjQjpvpZ#O{xAqG* z`j5SP*&cge)6><-d-LV5fBg0~<>fE8hey|)=Q)!il$UR$7i1p#Ik;LcdsXfpuh~&6 z-ktd6fs*#XXiM&ac)q^>qYndPGH`qk3c3k>mp5=#on^&HTP1e! zyg$WjD(d`&*7vW!>~B6jPW{rK$kJ6eJ?_hZf_gIW%|wPe!Ktvsy8NY%~kK`?=F7)*OjM!(-oZa+zzFj)?+tkwJ zO1-)DOJ8Ml7N-`=nY${um#Fisk(8aXT9c(YO6`4+cV+z3I`6;N`g5=T;_{5*e6wG= zIQsqW>#=ip%`5!vU6b*l{P0|1_bpf6*O+HN(ie8M?dvUmRJm~b<|egJbrh7qu%>BJbV}WJ>iI5 zP^90%iRZn3u|MWNy4%0mPBOMS;6mxzeE&#MJ!!WtakEts`ZLvM>m=w!l&Jh-j||H-p_>-Os>>I~ODKjbg&`uMDN`1WlVw?^j0 z-RxV);d7Ry*zbd{>TWGx`?Df=aNzD&mDimu~_*HeshFE{{dZZl#+T=N^p5U3*Y`;$L3pd%^P~bsn{?}NUcFF> zFE3x_+Dq4Ld*NJjeNxpw-n;Q1_Mfj?*8A$zw{UJ|);Y%xnc2>kkw3RjW7l zVNWa%y74duX~nMjxl6VF+CTqoulEP7`{KR2?62;#E!Wd;{LFvYfB58|V(~lWkkmn=KvnzI)4j_M?l}?Obs*S#5bG-^`4Z^Y7m+VNc0!yt-}vsqNa*F(3N+ALTl` z?7OmN+NbBPyUlM^@7|_3=g7y(*^Y;0Cs{~09(&UxBXx7-`?~)OZS&XuXJGKU=AB(< z^>5lAr!I{JiQsNy@{i3&gesyPc3xR@$yBx~qO9femKm44XLcI|2v{q9KdEzyfn}0m z%=VT#vln&hAI^7{cHj1OKXm@#%q7iHm%r|K?A^`oF)exjsY>O9GozOUaG-hBCd;fcCSlN}%Wx*xb_Jv-{dzF9T)mBm6)KMn`x+MRlFCwfbI&Lc~% zGYdTfH*RNoeeBKk=7=Bv84mAb{5!`+{eiwjmYu|h=iN5x=_`-sRz3_}k&Qm|Yu@fh{~1JHL_IOfEmy8+I=Z0R)mnF&5f9h2v+DCM zDfC7KZtax(#sBF0aqa%*4|nd}{bT(xZvUcacj?osUtG#;6FB!!%uwe0MxXOLrz9p@ z>M)%3ezd=@>>lSw+ii2#RCph`w#(-Fv|W`S4YxJCym%)@Bw}L6>~jixIweax79=+v z+7eW}YE|j}`>V?SGaQ(7eO>puou5`*E_gBh?(^@qdt-kmXTRAmV4v|<+9N>a(jV^S z+qPF8bG(u_^}&|lcN4moAI~>3ei?7`hv!<2GkZ@BbG3bsP2h^zmlth4x^L;a*x2Zk z-}YJuRrefKY4wY{#m3M-VfpFp#d*J8-F~cp`1jfR-(2^P+eyuj{_VNx-`W1~?b-9U zFdaDhwEgaWjsFaK#-Fy!NbOzr$K%7a&5vf^=ebd_*md7W)k)KKU3|NhBW2HaW=VaH zq*rY20ajfdYlY%J{Mz=Pfo=LCSMRs+tF|wj7Tzn*{d-mE!E-K#sb#B6Zn4%H*G=|JF4*=)=;7i-X}Pm~ z{Lh20PmlRiwc+wV*&oM`eSRrEU5c-9MNGs2oj6N9Q%06WoU)>)l>YtkXfS@za(Cix zMqkatH-FlnUw&_ktFHRFU0Z+FAISOimhbwZ>+u`6AD&?o{8;oyb?)+IGcLa__GC%B z9<*g@vYPa@XZ~Y9fAhKKN4btRhL6*lPF*!U zA$E76P|G~C#!s71%yeb^v-bpB_I;Cn$@5DpghQ{_Ka8kocbI2Aed->Y&gG_WZ|`RM ztbg;}Jo%Y(JJY8|KTWC@zVOHD9de=pAWy=bK>dh7L7?Uz69Ur#HT&-2g7B%Z}a`O*H4a_*&Z zuQod;sG9{H(px&~!NF{W7N(0o9~}Gb&;6{(LGxu_tB>r;nJ3?U zG`xJ4@0Mtjzk0s6_0ojBrDa+syy;At%yD0Tjn`G#U$H+kF2^s~dfR=`W-alydGo6> z*B_tmd-vM9=V$kettpMQkNtSR{i|)#z5B;zo%&J#Ti$2h1@Fq$dkSu==x1x@tk}NW zdu4pqwhK>ZXIR?xKiYDojz&*7n$|8b&L7* zFz>k^owcowJ?}~E$z!)alv$IwBI{Oot7-RHcJF_;uf2V%I_^n;5mR&lTboFb$N__0Qw9FRIO{z~OtXeRK^}uP+rC}33tUoTR zek7{BPglRBH~G->gRf<7wd}eRu*in5U+Jv;bnUsblaoKQ=FJH+-mblSTYvoPU$63| zKWl5hUHIrf!=|FFTmHxXGj#4MTlSe7qeMeeANY%^Fc{Q^Xlbmzh8TP^g{M6zPZ0`>n>FN+x;k(??1!gU2BgXdTFOHGimdt ztNHskK5UD8%WUX>@6M8gWyYz^54Hzi`EiU}U$jQ;!}=rZIkIP!EnW6nSGhUJN4B+@xKdmYpOqdwTXTh&-!Ea5d*uWh0A)) zIC2&C@bdKaE9)LTlCi?8aOdHg!&-a}-EM!E*J!RkzQ8*gB@ny&*Z7KMJ(=AX^ik#c6|5YD4Xk>^k;-~O?)mr zkR&djt_23(>Xe0lIKbFz18t=ZhlPq&u}qsW4^F_aNk#5{HXNKACV7NuXoz1WUS6>Ii6WCy=cdaMhSyI zCzwC3-*zrD@zv9>(-ieW#r{e~nLe-WKl`I>pH^FT`!vCQ}(~H>}ofJJRER`qOizuB=Pn&7b){uZXPu*#Bt#F3vx8->%h{ zE-n9V{=+)wocO_bZkyQRNB6tX_VL=z^}a0M!#?d@yY=XjkGD^hSu-7- z%g7P2QhmeONdK03+Wt4cAF7j(@`#Q1``Ef{sqTUuEgWkeU$46qVfJ=Mbb*A1<>L&= zM_X#je>>j2^<(?Ni52-?*GfH9KH4pR_}1BD^Y(4#;+LXto_TyaGbO}@Z`JQx9zTNT zJHM{y|5LiW#`DA6=^Z9pcC=l4a8f$@?(wJRSvH=y=q=JciFrxKs{)TxbA${s`YvQ= zd@}tMSor%=_+O@Yk;!*I$3J^oe{^?MrSZ9acKjl772Oa2GkjxjG~F)}opI|))X}@w z=9VsZl{&T|#wD5Q*()`pG#C5wJpOwpj#io<;cu8{^3iSWV_mQ7haR83a%{Km^4Y~X zD*VOyOLZ9L8#H)Foim=puRn<~sP4d`j=HzocHRH@bpO9eO>tk>U%KCJ{+~hR{8SVB z#(5f-|Eb40u0Hta^jo=jDbGi{bKQM+UKI9`Uiw-2bI-)?4=>tJrZ}BCwS!0GEzgJa zL-Ep?r$4+uEVaHp+W%smTDhIh>YqHtmcHM)Z}%^=+ws=>FZ{Ir=K0}y$Jf2e z56vHdwrtkc%d@RXh?i>XHH(ZTh!6LSs?a z8PNVbf@7lVnf|V<x!^H?Tcpkp-U zjMpdaRh%-9c^tpr`aHka>VB_{{=?gA%MV9wZ{1RB?zL&&w}0_Mk>?9!l-`8|i(KS5 z$hhfwoy8%kQ_J6|8k$AZ|haE&I%V;26?=&3ZMNa^yBmXf3g*e-S#*6^JdA3UD>)@R*sh^>P~%j zOy!%EDl6@0KYeC%QnUZ0!f|1t*SA;IoEEIC@x6UIY|EFt`_Gf7&FbG6vs>ez=j+$` z)2E(_qUEuQ*PMU5Rdn4yHmzLz?86^>RBIn+{t*S>?U8wckZ+zaRs zl{5#cE6PPVSAU!#8op~_kLT-y6X17U!2PuCd(e){$A)_O`;!9 zMegqDWdhze-EL?6XOJtI?_0S(@7%G?Kc3gET>fHmbG=^p_xSX+{x@GAk~8>_-l=}+ zw)?d&{O!}CGMkS3el$HcyV!Q?@wpSGO}e?+I=bcV{4E=6-W;#`UVS@s-=-bgl0wsZ z7lTi)b#uj!0x}oer<1WMVwhJcdex9_9Sz*qWKkhC*Inpzwf$UFKT1H#&gB#<*~1% z?3UcBnf}WqUv%l0T%K@&7+2}do7mePDFtj6wUq9Ux>{@*@_xPf`knQEw}0IF_fcxf z%KLfq=6&54{ZD*bTl`z!hm1o)N%YHbH|4NPTnpdwy;?p+l+T0QKwq(MV{$d-4 z^{2w0+$nOGcv9JZe1BV=$&Zz5@4lA*&|J~=NZ0$&>E$oO#AZF6J2}(*S!9&|Thr+E zdhhN$yYn;Q&hbwcpE`5qC;VMvQ~c~8@-A&^Ae)bCNYr~f4XkY0DMn_nimqwhBw09kVbw zJ$=^{^~q09O0P`a_1Wg6r$=y!UHG}m^_lDn^6^ny-c8wMoBOG%GJEar7iWG3eXm+; zFZ%t(n)!3DJ87Rztjx6CEwoz8B&BbGVAs$52kZyd3;pmv7RR0&emLlb!4Gry9hcXA zjVv|U*&V&Z;C9qu-7VhQGm<~)HXD?5yiYLOu2Z*k=MHc0%Jd13EH3PpcVP3adHBKn z+nj55%0JE@zGk)hMQ}vLV!zpeo3~tAG*fR&S;T`KUH4wzbBmniojv2py>5vhuD6zK z%jBh>zFmLk?XTNQ|GWv=CtCkH``>iy`mQ+HkKxDUo99`7`2E80M|p&~(cvYkSHBb= zubbdCDf-NrhM(S=J|YQ`j8DR6X58QWz3We;e6QYH-TH?S(XaKF-gaG5(J%GdKR;Xd z=#kfjPww#?H`(sDJFMnsW|N57#dr55>l99Y*!-}+!zPe*)+_Tc0p{F{_Z!pZ-c0U{ zY%5rLx5(Xb*5~-P`Qm%BKg!m`KD=uk{-JH*qga*5JGZjGZ4>ui=9_Oa+3ok*o@f3> zmb!VuObOClMLQK#e@4HLtv|nX7w;cC-~F$jUS9iaGxve{o9}m*^0&!pU9ze8W&3XR ze+EvwwQFArMrFRv6+WqQX^+w7nd?+KW9P=!EboiB@>a(~<+y#LP160%_T7JEs~5(( zuKdyUAk!?ebsepK(v#diZC{JHti!)+bNh{daMm-DMle6|an5R@r{M zSUUT!-s_kR8+K0i+i&(-vd3*vP_9egCzs2om5*{C_}!h7e5p?RkN9v==D zR*4c;TWm4!?8JQC#M0`H%V!;T#_2vx_2WFq=Epey z=jF>!zWw>GB+lyNuOCdG56()dNQurhtzj@qE>}ppYr6?0;li|I2#6|D?FYu3pFb@UXunZ;PK-*SE(D*O>p7 z__6+=RD1jWhuR1Y3-P}DH`7It>cTUUAov}?s(oHmC((QuEyT#w8ehfW+ zKv(`zUawtmu7LLr{hZ_v-xjZ3`}FRfO-I5cbyMOuA9~B3lo>kJd0p+dS>J-Dq+VR| zFK({f`E|P_kAL6T+#mAxKSO@i*GsRL&3aL_{ir_2YdPzW{S78pHs5_{&Fi^!Z`_Mh z4JOl0@y?OvymNEzw3r5m-*%iVJss`m}aK)djH%NTi?dXUdc1<=YKi zW|P!*mX#_gRpIk0)*rXi{qX#FK66EFwCm(Ua?0K(Zt`w9_Rlh5yU{zLpNkBZJjnfY z@_3=U_HvW?Zuigce=uqLG-lwx9l$_x)w*lZw6D-~4{eQRB4z@!^fDd!wx9 z+r->{C&({q`&;Jy`9IYLlVvM!f8Q7NexGE8`!PAuAG?eBW-fX?OT9s5e$bx1+pe!G zxjj2<=e&Hw)XVqSvX_3AT~Z$1qruzC(6#fCes`SVhxNzT^ZoF0-E?hBZ_&%gV$~HdxF65b1`F^EMZ?-0zDVu0${T#u|Q%GyHlmurPc;@Wf|0(xBgRaf>uq~@*oy&{f zZNJmD?)ClCCttrSeJIEMVd}B&56(yJc(2t67d~dW{I%Cf{!JuH;JUThrjKKeecbbA zqQ$4@F;5TNmtLvR_0CS@PyF^DyOT<7jStVdHh;lc`Ps>9vgA`g>0d64y|^ZLPv>Rp zRFB)TySm=3%M-eEEApvDpNih`3yhP!%DVEZqksLG^>eD&yD8=Szw8%2@Q?pT@DX`l z8{?1Mft!ESu6S|FZgI8DE%C#@wQsBM+#0W{JaxJ`(=~a+?;gpK+ZvWM@7#9vKSPtz zkLL&EH?uV^_xJauy`-NX4Y z?w$P2{|vrAS|7Pr-<}(HC#!6mfs*!yU6akq0vp(#ADM83^U21OriZs5d*4}JziHmy zk6aoxy2+3Jvc;yZU%PDU&Ur@{Z``r7J0(la{mwln-lHE+%kC9hyPaKC@1(tU=8yQ; z{|t(*#YMMQmwn#1zj)hh`y&_rE~%5w-e>+Xyq9z8Sb4GJ!lT2uJ`#&KoS!kV+uY4_%J=j*qD~nIcyKMcM zUH417GVU*b_WD`uEj5Xc_e<9lUHZz;lwBuvu_kx1`_dcP+pkxZOwe}CHetP~xO-B7 z%qhwG@;S`A6CdrHzUIErtMDlEcGFm=Qzu@9Z`(HMJnN(#&V@<~Q+o_w?r1Q69})WW z$#k=8t==8g*%ZZza#We9TVrWB;-KmJct#3*M@+zLvf(Qs3mSm*Sd(x?6%LDNb;- zk)D+5lE$rYXruF-Sx@(;o$sG@{^;AhU#C6?e>mQ`=FufxQ^`_!uERQkncIUlw_jez zd{*z}1dXJkcKIsP6X!Qne0v?S@x!#Gr3+)DBC`x!(jwoNy!^^>HGKMi2KUBGp3xZ= zE(xB;j4$od+BWIg%l`}~&Hjb&pStaPPNJ{&R(buus;WPt&;5Q~d|;2@1Amzc>xb=K zCHitdm`*=hx@`NSUcYy~((c)ugLl{KBhFE?9WGof`8`+9Hm$Hxrs>*)E#F>9pSia#V%0(ACp}?ejlaDs z_SLi1=zUaMzwBk%ton`|o#j{Zk~i)6ZG6_`wwlMY&s|p4AA0X~s!qN5(RJ&S zd!bu6baHau-TGv<U5xH^Dz`o__a?lm0-JoR^ml#w z&(Qp7L!hfjn@Aw!3~Csi6zFRDB;r_XQ-4sQ1K*l_kso2lkKPKoCjMc~g%bv$$6T{& zSIPf;EC2V4#_tgO&$sMt=rL{m<~GHBiC9{`r>r?-$m)3(ia~EC_jhb;_b$ zX6My8C#T-vTDMKCB~ksV%KSh46Lm$!Z!P@KFz@1jhWk$aTb=(i*e?Fhu&=dp`37%S zSC}!2-v<0=IG^#K;r&7fp^JeS4h~@15)z-){;%5oKSM2--BtcS-}?Xk5@Ez;Is-M# z7P&GpMbeGO*72+r__R^Hw}iWQ>zK~OK z*XP}MJzwA-PmR%!`Hw2T%#FM3+x6(=9>WDc-bYz&ed5|v=$E>~yYq(Iy9bMA&RNHK zfIU~^-IHC>9}4T|&YRoz&3o_ld$az`j+$!{zq!0~js3>2(){vKu8(}JkKCHxS$ET} zDP`--cMm72H7*d>I%Fy&BdGRxOA){6D|xPe^2-nFvqrs$&#w)7;$K)iQgibHTK~bL%984E)O>ffzb` z%Af2X!H@QL&YIu$nFTl9Ou69ZpJ*#UDFRu>KDEpCC$k*>yFv30Ku#UhfZ%Y_+%ZeF=b6N&vWiA zlP6`jUi~Lj@$B>C^grrm_e?IRTvqI}j+Rvp{cz>G(aQ@0`!fW5rhimAX~=H0#m9kb z&kBX#qBU=SJ7vnLeAwRe$MmtRc+j3pUly;)-02(8m_F&x*5CgbWcS62ZTxNJ{3yO>+dlKFY9Bsq z-?Xdm8^@Rb47|tlSDx3&+@`wg3eTF$Eul?%Z53JyGdZW}NnmVt$n$!#4b#vQPBnvbZ?cl@<4cE~i{wZ}NKn=2&0V&s)!) zI&S9J`J^a`e?ox8!5~Jt$-niptIodbeyl4UQS17jVS|msH?ftg<7;=lPpq-~KNyd8g)@H9ztf zj`-tzMOjQv{L+`&wCwD%-7MSZo9;5`ihyPz^@v)3d~o5W^!-sQ=idw1=Wb*9M;$>!)I3xrfMA|3he zY_k+Saqkc7$ED92A4j+T=+Dl4d|pI#?vi_oTQ1Dg&|UNP6N_PV_u5xyRAw7-9%h>| z^>!=svcF4fLRVF2KgjPctrdH_<*%Q1N|*FihOHYXg>;-T;yA^wV`MmWz10uL<*%jY zciO4e*j!y-6TU1n>|0ds*IoB3y1dW&om^HuWn$0|k58-7ipD*dd{*HGx_T{QQ`D~NRAD-pha`m#n z(Wk4OCf$kIbZTevGj>^bS*ZUCnyBN6Ovy!p3>mAGIIp_*4JesN(R^Z5!9Nt$cVVYg(YZX`b%8 z+&#Nz*j$;WaZ~o#nH8*&D$=o68?Ej8{LjW;H2quow)QQTm6P4JPxaUD-rm2ag2zc<+@pa=#?5eE8X_*0S;T1p5jD4|R)!_t%9#>TBAk@SYuEi|8&|ci4bjf!Yby-by?L^&O;M(Lt;$~e{StSdbsEn9nI6V|pkK5i`{A?s z5B9TGygTl6@!F~E=%1$Y{JL*GpEllowouUXk;mLw?uuSkd0&g5f##~#@S+3HvS zL_hY;x_roL>A8jJ5AKG3dAT-!;Z6bf(qhX=@>^8=Wj0JXJyVjqBjvoTMSoeTa&T%&vDt?CR)crgBSyjOk-$ptK#~2|CX@FNA=b{=?~XSyx2AM+4QBd$-eDjSMzLw zYjoEpR?m2t5$?%x?K7t{i;>WpwQBDlo{e=?ji2TpuKVZq_Pc+ho&;xJde*h+>Fv|i zRsZajH(ak1s@Q%^&h+B@(*F!HQ85+KZrAL$c9=KmA;{hxv7PQUQqH81wxtk?fmci#F<+4TvfpZ)IsXUMsGKYw~(c}ivL z`kp@yb(VTdO#Mab)6DyqUhMBkyAqSZpQD&@Y}z)tbYae8=j~5$#d?1JR2FvrQ2s6N zN9J8Mp$|Puf9%ow<+h>ZKZB92*`a6BE#lM4Cm(OGZjZmabt1b~d1uAV&CiZWUG9!H z7yq1T`uS@8;mEz}OPyj6*}dM!dKBp{~4J6=~hfX zwol=TNx$UD_MV-^Stg{nZ0e(ZJ*ke|Eu==p?tmy>7)IH zmDvwh&v_YMWw*9``MHTsDc1+$*_fq!?@Ts!7b%Kc=_#5T_$*)XX5>w758s#{-VZx#(tm4JD(3dyd#jhfn60pL zdtX3GYwpu|!IF~{pHBZQu~N0+#p3=SPk-zDNPnCnwe`&QU*}raXBphO?Y$!N*R_Q_ z`Xa)W9h;&K7s~`Cmo$jQtgJoogzNU-Szp%%$Nf-z7(KT*T|S;j0#X-7{(b>@4#yUoO|Z>ClVHU8^g;eCgf&5C7~I-&6k~`N%%Ih(GrB zJ#W|kldGJTuQ%yd$mCZ=y1U&T8L#jxDw2J~I&)#BrM+F{@*1DN^K;ED7SpY z-Iw31QmUq$PBigc9(&3>noV$XeyH^fA+sLAn7r=C{}}}T=>1r`I_gK)t55$K{I2Sy zc&z)%J^NyYNUY5Hr+ak2h4xu4XgAcqIp=NK%*c&_Vs{qpy8HJ(!-tZee{KfFY|LLe z{qCwirax;zQZ)ho?`bHC+zhIPXwq?(Lk8Nyko{S-zR!NPpF>HS@)8-p{fVxwh8iWE8Jg zwdSI&Q&SA5OkJGNBT=*?iO=2Z+|s?XedFDi7kkwo@wLv{`lj;uq4iBx+na2n<|$s^ z>0twzvin=Y|ES1+SZDvp@$dTWzr9ZX z4*tzt|E>R9`7PnM@#&xbGgLmStloe1&yREGXN&F)FWb5+zi!s)@R!xwKks^x?5i8P zHg9g7toYissV|Cexxd-DEK4x%e#PzBt2U00s?WydW`2$3`uE)-LT%DL?W75+YfqH+ zICJUAxvM`@nRDq!{I}>!Hu@jbeSg%-ExLZIx3zI|_YtX6uhiB(>Z91QZ)E}ZUTySW6(8f7tsL|7x}f#RUDkU~ zYTa5=d~&Z7TW3mfS8MJzU*{)tV%NWzI(Pl^3v0JVZF%}R^L760{k~spNjw15^uc~1 zo7BI1AIxw2XR%4(@apJ_?fzRm*3JvNrKh*=a4B=;#-j>~s*{UsvO*s}NzJ~uYHL>f zqx#y#>+k08n5(zfKC*86??YW0D=}6H9$B_0TjB7E#20RvR{~h#Q``Sl9RJTy(`9#> z|If$%e?LWaMHt0zCjMubXZWAtzEc0D!~YpD5+_D@vq^}`<}-o0D0Yp2;q_a`SdE#A^NJtN#T&qJp8ShlTx4gbgc z2g?m^yt)U zc4^VPOD|pvE!q=y>1eUchApceEOvjU#KzmI<>&FruI!`vvH7AE?GLXlRGYiHqCW7# zTASGP#8p4uA2u(3u`6%!@~tb&C%+Bkp1%FIMjNMk=NyKLjGr2d-j_bxFE;i5{qN;B z&DKo*`bW5`_Wp<4^PT>b$ua#qelhy^#;l1^>lRAg$g=XOYVu7lR{RvY?`jImw1a!@ z&YpSE?~hP%*VYyP&i`Y+vPD+fEm!cZj!AcB&YJ6f`*&_GZHlSfXfxr&dL@q#@o*iP zh6v%a6}itAu89A4N=|j}?vG+KAJuA|kKej`^S(=^C%2w>yXD9F+|<(Qmr3qBJ~f^3 zX%P}&i0qmiR-ZAyGk?{O*$2g=>bp&5=eJq)O|LAN_3Fp1dj1=+@j4TAy#FrF7TMjk zaKakbsWI=){hggL$un~8{eQPh>us;6ec8K}W82>+-~KaLXFlEhDe9-Z&yVE8Rpre! zmP=n)*>1n_Wv<>eZEf$`H~&HmSGdHU@s`nGR+;=N>)_QlY-@H!?%ggm+ab?&;?$bQ z0tw3;W}drng!jfV57AFY|1)$&>i?6E{1g47U9I!s%^GFdGf~fa*TrO-MQvNXhdsGe zvUTpB^}3gox7g2ko_M=)^P|1&hphD9CO?c#j{PXMa>=r_53l*!W$zb>yLqs(VtUza z1((m4%ypIs=ZR-e@^rH_T{>r#s`~Cpzjy8zd%V2ldabg@!7bZ7|NhzE6m)&F|Hdz8 zMFImrYZHhXQ3Q=ThEM-f{^9-Mc(F_?zo{SfV`G&LJ$f5<>h2%^!bNNE9($@Wf9E`X z%Vrj><=4B1z-pWnZoe>6_y!}fz`|0!MlQmghU*WX`^cjcX!w1_K9#y z(An*adgf_b$Hwpab8P+E{)p)h``g#WA1aEuefQq^hnYUVLXSRMTx@gEBQeAKr1FWV zq`(`(nsqy0H^+!~?9u#?9{J;7e*W6*Y2tEL*&X?hKHRlM;Zzbo)mqs zf6M;PKdNggP9Ltaku#h0_{gmCjxRIQepD{w4lG@HVtb&~nVYTaLfl$BVr7$W3x*x@ z6j}c^Y1j78ce7t@`D1oxdVae4(;wOS)BZ)zYt6dPmcP$p!anVbU-dgDc&krec>d9} zo%1fmYjjG<+CEQd^4BhV)~A}gbW?$E$u9i^`9)AOF)72LdB{GaIO>gbcUbN@3)FL}P?mEOtyA?x?g_RhcX>-NEV z_8Q#>vPqA_F6FL&a%EZboBdf=O{>qkK58y|ucch9GcTliLz(+lPv$<>vaO%~GrU>< zSatWua~mK2wDHib0>{OXeU&yYO7dGqdWZC>esi}Q~N z$2dPbo_4`=)1&B?$3;g=7Ct%0@ae`R29+}V&G&mu_A}R*|Cs-{Ha}+j?1SBpxaa!m z)?K@@bnO-G?40T51;^$7Gu%B{G@Z{(jJ;4{&XY@CUzP4n{F8h7=jYOYal3BhdEeZ+ zyT0`2e$l+SzcT9`emsA;zOCwg^P7_T&Zs+8>g`KrDmH2U@hx2JUaZcToZeN@bGO4y z_XN8n!*%<|@-6Wkxizs5mwsgL_-ZSEG&piy?~>_18dpWU65r6t-7#hDBhP}{=RB=% z%t@KUlU8?op7w`z-M_6~mrFfcyX57idrLf**)6px*mFrTIcdXtg=sf96)O)|J_vJv zYxyz%;nuC~hjOzXABz*~vfeM6IsMzLmRWN*2uG?cIcZWP_H$v7+cWlz6DODKvsix3 z(_Z`K_nZ4CO!_X}cS?8twEd-*e$LzbXg+_1@T1b3AC|4G&R>~T<w_lK-mEnF&yd=y+7Q{*6Y{K z{by+X7u%{`V{26v8+|#t=F9bUe=FS&z2~S=y|^cLz2}4Ky?XDDlv%&cU-Q;<+1^~; zl*wl*g6=LY-5fR{&+ypBw!}%CqOZhPYu}8~;SnpA=D6`Kr=Z^I$Kr31k$QV7KP) zU+w7hyO^^F1NTQ0o&jGdQUwfMs#UPdCP+ z>2(+6)HeQ@``fz4xB1Xy@3q@zea@_!x_60DpA-$vR(I|m{963bb7_D z*v{~bu6y1ypRh)x3m&`e=gF*cd5`CZ^9SMtKKNMae%Zb&t+{q>gs1iCQrp}2VmKE? zS1g~EZ}?|h#>0M|1FT##1CLh7AGVeE{BiWR;t#v$hkiYsx3qY{OY<_TnV0gFcTTcM zak_lVgsXq=laP)aex3$_FxT_8=A>@xe)ImkcHY|0;bH$78ZZ5|KIC=Lf9*g2^?&5{ z)$aXF!oi(ka=RbBRX_MKR4vk~IGPdnMydsRyOG%Rj9>)w(D%xBl4a zFWVo8@11?`e(U|$1&g{gBtSKFn0}9q^70?A4{xthTlM{9VhPjfzq41|iSThyc;K6_ zI#7oT3czd61!=a2l-A945W1#Z9i6`8+L#ZLQbP<5%+ ziloS+=VvM1>SBrh##69ZZNr}054#_N90>%Zqe&(5pf zd;G9HPwdHu>Q*6z{ah!Xz1VpB#7Fmxnzjapj)M^#cc*AGu@$j7n4bD^{9*WEOPk!K zukWc(PFq{Q(&?I6)wKOZ)5^myu1hk!vE|V{&nfOZ3nlX>+qz}eNmlg9x5x9^2~{K? zGe4XDXl=01mP?PCQpzjeD{cO)>g1pxu+hUaiT(6-i}jp;7v}E?vorr;A91x#F8lj8 zsq*eGr}g>cZr**_cFn^l@AIbB_jFd9Y)bU|an&h${n=@jFQ(*n1o-{?y8g4*zul$# zV{;P4tMqcC@9y0zT=wPu>Ef$vP3#-R>m~19yB~jH`HyC~1-}|UT&Yt}PuV9p>AU>R zy83DAoYRadt&i)@`+6&b$kRC8J8%8icKo2c?4nCGoFC=%MdN<79}VYyWY5g8c3G{V zl-TEW2r;bXOF`Q)E^Zh~E=0_4X$q%G&{z-h)>vC_d=sMl6x<#*E-Sgl2 z^48=#X__ipMc*zMu++5L#qOS6X1IB|_~pCt=3DnncmL18s;j=_`IFgSy?+1Po%{S* zZSI5p&HKbY@NFzyooROcQ&zC!xA?7Dv-Un)^wDv5`BcH3)BLq;Z&)keTGDu&X?N7m zddjz7`{f>+HSL%E*|(p+NQJL>^nKs{qWe!Oe!ZyY&iyA46Lr6$&)5FIuCmo77hjo6 z9ogj+ab~+wTbK5VTVfHHoK>=%CpI>mVNJ}oVt@1X;r#Y0&b=SoCO_QI9e3wW?8{$A z;wG$&yJin-lVMm7g=)YV|k$Q?GD-@cm8xgL=V=y}8rwdtZL5bC3J7 z)aIh0-pN>|pnv9!1Itx?&Wh9CAK2eqe$+2q5gz~HKSQ^TP-tnhZF%cl8EH_o%Zn{-R{XLY>My?i6{$i(`4Ng=0G zJCdZ!in8q=n>X*1sCXRzVc+b>XX||J!?P#cQkyT{z3xk8o*r{*lt#kN?z2<<@8%fm z`Ei_Fn=A1n`&;FY_=mC5!B^@|E|2`%?b7zj$u$C zu|M7)jN{(@Bk7;gN4DmLuk1fe&z!DxYwi=X&$ZJmJ*PSDZTxg);?8|9PMi%{`!;Fv zu41o$xl8M#BfoF$?EO2tZ1dB+dtUGEP2cs^I=(ZW^PkLXJB1JHHa>bA9(?($w&$Y_ z7gg*GuarEvuCs2{#wFz*oBUp#Tk_zlPm+ZUyAndCuxB2vdHmb^)jidHKPEn`Eqi{T>fX+Wf31();;ECo zvSs&O!#B?(^^_)MhIm#?;!MrmsF`tywcq-{#MQ}1y;s&S|5%(_Hhq8kWz&`EzjY_< z>9|v=_BFLSJH0x`+hkL$@!uJ3wJUz>iD*QHyw^5g{A|*fvhMw>_wBZ>ik!S9KezVE z-_Tq2D%0jw$vbAm^ZQNyXnfSpaCYdtPqAsY&YhAL;yRkn!+4J2je#|TN{b*Ht+k*XFzGa7Q zNuOS=dhW-tFu4}Fxck(|&eC1lxvw_+-x9#_?6SMr!(DYCCNBr`b?CJOUw^qjs#v3o4w|qsv zp!d31#s0DrD)TazPGowQ;L~#A$kUi)3kjL<^Yb@7y*qF3$2?o-!?U(d+q~drh1PxlUA1OsqTLhKZ!t%` z{LFCN_@KlN-rYPl2dBpL{Mz3CPa#WQppMsW^@nZOejm+V6>KD#T@Wn;}F_oOn>1UJ`m zIo*99>O0=#)|jt*H7k30hw^_0=grk$+p;ehJY0KfLs9tCgo|@l9%PY``l)@$>OVt! zjp1*pn&fTQzj*v%UUKu*KGjW^6+Vid?NXTgWT&8`U|*&9*`S&|%%_<*?D*NA{8&O~ zPw0o)hxQBYQCydKYufB(nPyC%-d*}--R<4Bb*4(I3Rmye#5ue7bQ|Sx+`bfG7V_eK z?cQU5RmJY#P5rdt?&C%GK6_s?^8UEHZuPZ$r&m@eAB>aP`+=?3=6jgo?X1gpUz!zp ztDLN^OPuGH`E-J{rvk6Zg56E$xNDAtFAYC?kK~QqCU4$qf0-QL68oP)x~8?XH1GbJ zEr0C3VILn{Q!n~zdzD$wbu$C^;?n2NQm%>zYA!VaJ}GoX;ZNMfl||t@t^)Lh9^O*C-@g<78Y8?yl}}>HgYO^@ZWts-SXQ}gF4sh`yEzwvl+5iq;1>r zSx#;1%t!7)3%zBg->4|HF;p*SUMr{g(f>yMrZ`@=!~R?Mx=(%^)g&~7i>twW8Rz-p zAEBrFxen{zzI(X5G`;N8mU%U-<_xF0PPPS^&DZ7kc~ZK1yOoW2x!9%CyPTP95+j<{ z$uN|s6dzc6`Mm!RiQ{p8-A9y2dSIygPgl3jD~g!=p`GEBx@Gk*L# zWN|O@?YrEydnOl(d}iS^$`4_!%juaOlgEAVWk?)%E+RLdLQ zsplCUD>S*sTyyPjT$Vl4_05}Yfrp<&xr!XTCHPExwdSYmd6&8y%D=omYPY{)`cH9% zAC1dn99bqFEqInG#c|DcQr;9%xro%Hr5kSVusEP@BbEKf^^(xLPrT=j-AqxPxH$Ki zaXX`PUX0uuk@9nvtEbxucm0UodiC$yGuNK%3AR>9nJ~e*VHMj0Hg<`F$}+QR;-n(% zu58Q_-*C{dJR*6b)1f!FyA^NG+i8*Hu97Sh!1`7{P-^L-a|+XLtMpFJJ%2@1)x&PV z#IV=)t``?wcpJyDq_X_Pr9G2aS!aB;2v}uR#{BtgMli#oE=`={TxYHSId}hOi2GFk z!ZZ46^YZ!=h4ruR`TynAu4*v;2U*nRMx-S(}s4m(}{T5@Umzq>PaQZAi)85A~udZ^(R9jO`UtKdD)Lwr{v{c&NJNh@1)0;+m*>T>aL3f#Kd|8Uh=r+xX;JfcY?#+tzmfp$g_9r)g&&6{t;Hh z67^))9PVT_Pv)Gb+hiuVv#&7SyEN;X)w|ibD_(A&_N#4vsnluKyK7eK1S?AN^et&) zDN%T|FQdM7pR%0L?rZC8R3Amlmc2e!UZ;BL%YO!*EUAbqCjLgu3~9f8=BG^9@mcb6 zNrU7$bHQ2P^IHXe%)0;ZeEZ{%6Sl{T=&t&^Cu)^e<(zN+lgkWur=Q+!U2!z%YW(Go z;o0ZTyZvW4?^)$_I=7xZ^<`=Le+J(5k**>^;QkG$k3%dD=lq!d=b^1twA zM{L-+tLs{HubM`z&Y4Nag=~&_26M%Q%IiIQeO&bJ^}f2RW^$_6|4Ep2KFnu-Ri}GJ zUw_Rr&E6A#9ixW&AwS2>#|S%BlrH6d$$j6 zPvQ*<+O_Ile6+h+xBR4YQm*c$Gv0GcM$Fx~zs!SeUnM!4sRREhb>IHS^+)I4?#%0H z=I^%cuAl$->CSiC>c!sb)#>?v_?OEbwQ*mpU92B(&@+v&Q|~nHsZ4M=6s)MY-0`#9 z;&q%yqb}GP{a`*iYl~D|=i~XjudU`hy{4^eAHIEFCYMG!SI3jNs~jz4m^jzlUvKZ0 z|F-19*9tqi*W2SHHeY!6C9?FzyDt~>pT+oRYfN#wo>VNw5o7*td%ARJyXmw2+;$8V z#}EB_-lX^5##(myxAJe|w^gEN7X`ohCN;-gzVxJZ+%wU~miH%4km&5*YjN`Z=Kl-_ zmaP9Cxj$gqq*<||*Z(oi-S>O(%YMEZ_Yc#1N@Lx9F4zPvdOd5}tlRo)zT9<^?pzyf zab!i0mrRpgf~Dw1ArT=zh9{HbSby7AI3JVa`KaN+^4qVa$fqj z9iKM|U5YkK&n}p~`P(%8xij9!y|2#Nel1+=jMtirTfninCtAHF-r@>Ta%ANc+)@J=Sb`c3)3zLT?V#Vdttu+J8I zsa$(%$&{bdSG=qf{Na7%KLgi3nTq$3n?96$&aPUvSZ(>zA9l52`df4YZ#!n^=j&dq zR2DLd);7*B;QssUSk~3`)^E0b_MUdX_UU)sUCZmsZtwjwe{bohO;^9azP$U#;g5Nj zADnLskGoL{=|}GO-_k$yHZIogwa2xs z(>7h+zs)pqONY`VTdl*@>bKJGD+%rNG}y=~({ZQez}b|#E4S)9{{(*U4!KZc`rxno zkz421ZFK#X&Ar;9b=`GOZjbNAN4Wkht@AyuQrPD{`%n6h=tuYEvVR@#(6M1&argYA z^*qi?59gMyz8!7XSTC`+H+7HTohh^A_5PlW==pQ}JEy|BqG!?Cul_UW+J(RVwCiQz z)2z3*^UuaVPy6@!=9|BV56_qUVlPs0tDg5}^u2ek^G{rvoBUh%vXi2*PKssC@>@zz zFNT0mQ}PyXDe2e~UHGVUbNt(zZx&yk_hGl#Z!7B)8yoztO3&>)!18g<+nAcaAEUoj zUdhSXGS~UgteRcVHf{SD5yz3VAy~^fcN3$QpOEFl&y&pRElw)+e~tF{PulpS)%cHp z`+@aKw`>p0&|DLr752{VeBN2}sWR3c=VF$eS$;`5YOC~8@m0INUfjKY>VF1ZpWx}b zPqvihpAMN+Z(Uba^0t$UQ=Os6bG`nL^50G`>;?ZgXD+>!S#@rv-11D*f88sx0$iru zI=jVhrOLIVDrsEY#|pKM9gQ&v+H-yBy=d;!o^NkVjSPhB^gmtCV8>c9{ZO6J2kWeT zQqvvV5AL!(Hn)7$>6J^*MBP2ucI%pC?xM(;!}1ezB%7DGS2J$inJjQE*E_qe|I?)-4bwepYiB{S>!Ym`@4Opcnl_RCtmyYKR31$O5KUJNNZalk<_slGEmwK#uPs-kD z(+~Xzf4y%q^_Q=>{!c(R-6#6Bx3;eJq1;tb_p*#R;vPk&7~5WK$)EPgcj5Af$@1lj zE@nTYW7q$!{F%3F(eA3-zdqf)pMG(!!rx_k%wy9=LIVA)PC#E zTdUjh@lMPt!!DOu_dMsR@Em^L`)G=&g~jW*dZvn_`_t;he^`BY+S48UQS`v2oJ$$H zQ6E$<9Txc>o|`_S?|63mXU;pxF89hO&yD(iD^@D|d}!+5tnK?VquqX6FMs;{Z^nNH z`-NN2bG-PccE!Z|t=GwXy^`q7waa(!PLe$1FFtdNjNwh?#vL+wCk`H5JL|Dc>Eo{- zSf4kWocMCOH}3Tjt36RRfd?M?Xg~GTnY^uPk&4gp59`h?U%c)5U-6guFFE7=a^L=| zUwa6C4x~r`yXoWk;`b};4smz$bH1+ApB8B*9m{j8Q(59W$9nDU ziW3>zXNhe5;U021H)pL*%9Oo(r{2E9GvjessBzT7DI!J7pPxLtQ{`6p^)F>_mz%DC zS{uAE({;=IudDW-yS{zymaOMjcd2bnedH^4IP7ws*n7YI+xuSKdT94%qE(xl+!2B7 zbEWrhDLY-TNvvLUdP#Qh%Ufj&vzNSzQ*M&*o10i!q-=0d)2P$auEa8>cVf-eJEw0u zAKaz4{ln^m`+5FIFV8rB#H`G$AX94Is_mT&8~dbVIKSDS)yNbK&iZ8jROHN`WN4REq$GN zt88z{wq=JtJ#qWQaXIC!(bME2$w#3UuTPv?*Z6Vz0Xdzk3od=}m-&$&dF8p@#>mWH zd-r_y2z0gxaaVa8#ChO>fNy1d_a51`6~`Y>Z~gk3zvV+l)xPdk20K4KTc|f*W5;Ed zqJ?d;lS~h0GOuGk)@bDSK2H5%`T>54AMr6CT<<<_Pu}wH+`mJ=Q@V@uylUqi4sLt= zZl`+ookM%ltv?wt3r>lBTz*%7|N7JGy0>oGeJRQ-?8W-e;g_GM=Kg1p)?a30`BDB* zoZ;0{S!0KfZr$s$qOx=LZN6N}k*Bsb=dsWtLyaPipAApKT|5FjEtn_yR*1j(`jB6u z!uZJKm${2ySFs;Ht!r{HdD)Lyk9~K}yY=|o32)Dwn{(G5$~%86VqaPE-s-(?SKW_{ zi8j>Q>~?XIRfC(x+$)xgS8sp3{*Gy0x%k%q4C{l>)~J6px>w(N@yB8353Yaju3KGg z@+~U9{c-C${p3mMH%sm%yGltq2u*rtxF@x9)r5bSc8T6^TdTj;gkRw9>+4ZlZb?-H zX=k`C(@zZg>~e6azzUbxXU_W5x2%(S!hG&W+WSZRhwtA^cf4d{{NVT6^kctNx5UYb z`6M>zPq?3+eN{eD~h} z46^^qwofX*y7*NrckuMci*Md9zr5~C=kgxw8~1(psrl$dRxaThP*Igs@qeXRbWYp<`mok}3e;1!quU<82 zqES=TgS0Js1V7e$e0<(fYO8eW(!X2YhRR0^%O-ZLSMDp{KFMR-ZUb(eycrV%`-~Xb zO`px*!ruIKeaAfI%ef`{TLrSLzD=!9FI@EFd*F2c>ReNojVF!YR`MQD&|Y`6vsp&= z@QSq`tsnGlT>i&t(?|a1Fa2*Vw@NSDy+5t%V1l@|jvL#q&a2EvI`ZtRy1qMZOZ+%X z`Mqh~+P}A=f8Dy3ze=@a?sm`mgTMclf4cBJ*7%T=TmK{3^w=$%BDU5hpA^4dJ#$u~ zP)=v1iq;Bs#sZ@=g_At5&hl4NpPX%XRd*{}#N!QeTTi@*7i8NH8h8|H_nDh?L3hp5 z*&%v1E7j&$S2C2|S+U~ztjsebm9?HiB$(f+YM`(xDQ*h#$n z5g)!@>t1O#y*zpCaW2129gC(bxaxFoS#rY7!`6{mbnm0Lb>9z1scgSh-)?Fz8nald z*W6>$RXrKurUQ4*@>IzB#mGBkpZd>`e)MDP_U?VEryt2PWX}`Ya^3XUwVZWZt@m1` zRGeMAMlMq9da=>T8L>NKcNcz6?|QFWN}bEmCW zv6|VLHu3Ex`!_dLc4wRPJ&Zh@XRh&4P+~Hxr$vn6b5nc%ivJ7``48yx{761n#rwT; zk8jaI)3x5d^Umh!rp(xLefq|=S1ue^>DAtMRKFp{kVjh9<$>5SDa0{)uMNa-zzTNoBUce@B`49h?52@!CFLA8oeYeVF$5(glXPKLQrPt)_3@TJ^ zJ0gIJl3b-#o4y46 zXL!>!{m8ak=1p&Ff?pys6Mk?$-7vkI=V7IKqz#B^fHGPdt4tp*}m<{!dRul zZ%;pxHP>Et?2^(^+tnwxny)D6@v!D#khA8xn`$XNx&P|7vL8$D)>r>N{cG#brSl6- z5|)46eK-C=d#3%<`*-WFzP*JKK<+3G3}wo&QjqEps}lcK|Lxemw9VP9*}GR)gh~os zJyPl0Fo~h$fDLn#cH@uU!euY-@mV zhHPnnv5I-eJ@#FQ_x_#IB(dCky#;%6D@#rcQ-SgMsP7MojN|A0XW;jx}T^$mI@*n^8hgZJ3FCgUc)%vWu-*47E3b(pb zKP6@kE1lA<$L~sao;WmdPW;FBN44Xb?!DL*V`aAXQN2L6 zTy|u}Eu+KVI+Z&#rv+U;E#lF+mr?Ka=@pXdqNjg+e;D5Pr}jhI`-A3jrWX{i^x${FO@#;3jEv?LblpLENOq-hgcrTaqYUdSt zt;#RAhh1monP|Ik_h)v4L(beiZBA8&Q7dcwZpVN5&tUuOefEn)UsH~i`MI_A>o4cJ z{n-Ak^>U4>)$RSQd%RcPb6m1)=jx~%(FZPNT+7|TsaJkx!JOmz?;aXiewx#&kn$_H z_3yrwiMiF8TIaKirOxuKym+xe>+p>x-irJq{x>UaDj)6k7rEp6P&W98QD#79)RcXi zA6lxm?m47Z7Mc6l_}I1gGgD67d*v7qGL>!bao>&oK9|?#Jo0$lbx);JNa9GwLI(z| zK9Rx$GF;gY%kTYXXubb3-}e1e|3_iB?l1bA|D)#3MJ#7Iz|M5QMc2Ijxc*zyNAtsf z>yI0~Oh0^WmwaPMv~PaUwrfgL*KG}%`Rq*DC7l%;9yo!i0_{gTUl_*UrGzWJqUix3G`b1ZN>(1Wu?aI5ahMB3KjrP7irFM3eP)o<02a`B#uG)+JJ9}N~KSPJ< z!^B4!CF?mWCjC1cbnQ#=vdZm3XEPK-1!8`#QVfsI2tDks_|r3J$wmLtvs?c&#NMB} z{{986oS)To-TBt{e&_Gh+xzi&nEx&QLv@@N*Q{A*V_IBTy!_R#DG$#q`lxg7rD8)H zmzkF0gbn#$Pc3FKv&vZ=d!izwFxm5C0iH=s){n^}f|!{Ov!z8*}}AK0MpH{Dram zvm39a+Hcl$f4LK0k}dLHspQ+a+mif z-_?ZuDc$~p>(0C1@gMAJX?sND3EJ*9U+t@Ie)-LRh9}#8{t5qj?N_8}oy)wv^`El; z?32!anzvJ4_MhRaeas)#+xD0*y#FFf_uFdLqi=St?^ye6=e9PBr`ww>S^_<##iv#8 z2r&DjamOQ%wQ}kEx4Dmcvk!&Wsa@2qv0ZpGBaV0b$FkMQ>vT4qu8Dmogx5gsgre8k`vZv{^lGW7cKD0eofL~x{9xYlM>4y9(eVW@c9nn2zV~XMY=6MjZySzmx^;I; zmu|`gvC|y)S)MRBta+;!t-kjA%WrG`itjGHx^{cs zETI|?tFq0@|8_g<-BP{QOk>OG>ANS{ZZHXQdli|t=b+BcK9RfpQ$NqkzT0?${ra^% zlOJt=^Lgoy{|v61H(lzRz2G$Wzi*yjHkM3Y(YU}pRmja}qc&TY)A7@(?3wFko#K~p z{^%rKZ)pFJdCP@Rp3QHnyZ5>-J66Q^KuG0u3d6C)p0^fPKW#s?e`(sg{|x$ZCr%ct zR%YM7_A>MP{fpUUW#;CbnfW@OWZb31lV32LTtCN-?ce2D$B)!Y{-`=0TFaHqFOqpV z%dBixNU-X)zSA<|g3))aPn0gk2Y@c}D zdGk8PkL>MvrSc-ZH`GxLqz=Wp?Twtv3TlE7nYR!ea4g={?Q zVrPDK>wfoNx9-o~Qm)_b|N7Up=b0a;KjN9sZ71=g`KUnTy2Yi53})B*ev4(^on|q0 zMmksQb(Ipy^;SQkKQI<`ik$AKK4V6Z-Jl$FjYJkKXDBef+jxbnAwzOD@dS(OtMbOi*Eqc=YX=YH4;A zr#+8dV7g-cX?K49%c^yIts(!fS^Zr<2)NV`ftES)A%1?TI>Av-;O;3N`zx?NS=^y@*K1ZFb*QYkH z)M{Qm_Jha`;=z0G&;MCJ|G|-UYvz6X&tU)Q4``bO4`?qC2tU|o{Gq{lQ|3&ZCdrU8u`w=l<)4C&U+%No$<{b*zPu$?`FGTJ z-{y$RrPh&8rY_#Kx9w)seW8UNQ>UD;Iq)PkWS!&s*pKh}AInB@`~PtM-U-&35Nva4*d+}hfeucL(T zdsY`sUtPteQXZhAk;Xgq=RP+su8BJ)UcCFC!KCip{`TdI@5t>gtN&oX{;1r~c=H;^ zzl&>JANu!-%ZYBEeK=6Zs&9H~>cj3W6SvMS<6hL1y*pf()#JfkHODUhuY8dc1hQ|f z+Y&Dl>-*3%{K&U!ZHJuIw)1YAX0tX#BC^a)&OJL-XzGN-ISY+zAs#{ z{K(|n73=ODKRi$M>X*OSp&KWAd|rR_=z<1?ZF@dXacYTnKkTX)6tgn+j^L#w`CGsK zzW%cQ)6FIS*mK|ht&gjI*F8_+$J13ue>?mLJ{V;>b?Wi9b$WVhOl!Roo&{D+^Do(O zz?qd*RNzE}`LCyeP12%!n14tAs6PHr>Ef3eHM);_RkD@|=GU%t%-Z&!VP8|`1C`?i zZ+{=zK3^iZSx!3d>7C#I8E)zyTK>qtttNJ9mes$hYLi#reQhpP^`kj>(yeFJEt4xF zdaqCAF!G3PQoAncCLPhWmiRr~s@2->mp|V9v*gn|%Y5y!vai?f*G>P+_xhp#(cksm z+thA(KY(|!kR{w=eiBH!WPiLNIzUD_VKt@&B=^T#cfp9Yd` zmXEJ5?Nsl4Jkfda%CO)I%~t}gKUL2c-7lN_i#K}i{p^42Q)+BJ@`D=M){Y<5X0FII zsAxaJv1^G@wpgy|*11dGq;~0RiFGc~6n5LQYm%et>a(Wtx3rJHd{8U)P4(hhJ^n6J zUhaU8yvy6R@@5_K&dzwI^iJA4^IB^7B-5T}?p)4AhuL;7`=R~l$A5XeO6|GiP(0PzBr3po{}PS{;YZ9?&K-@ zXusTlhK#fM53l!>>dHG#c4)|$Wm0wYjMbH3PrcPDO)oE?csk3MN=|65~l ztT0ku%#P`moc;B!wa#uIBR*V-Q@qIDUixFU-|yg1>!+Dr7LRQ2oxS zX0N4p*7Lppbn#dG>1C^KPkFOA^IQC^{pYT~zV2QdKDXlYw_R6E?1i$-t)ep~RusGS z9g6O?4roieQfe|wBTrB2p34N4I2Un)eX=U|JSF|M-bI{uc)!bp;CYA9N9S)A|MqO{ z>gbpARAQA6-uiL5ZTG@$^PitC()p&7B;9XXGvVV=k)pglpH($WIn}n=snwm|7H4(c zWUjlCLC^~uyHooCPJt(zBm;Zs@Xi6Sv~X}@g$^10CucmLaKUHX1m zTlVamx8g;oFTMUXt?H>!zVWGg?fltW-rWCQeB18oi5Eoes{Z;;Vlj(iyo{J6!|$%M zZ!)%|&-lah@p#vsn3}@nKlW~^EcNGm{dTF&E!8dCK6uLC%ze(XX3~k$&$NfB1YP&ftoT^P{)?N5bwY@3&er?N@sHvdiSNh>U!=w5m@}2(~cuH-H&WC>Z&(ONgq&Mu< zwcOv`kJbJc-@f_! zW5LcHyPLgsHE|}-d$LNSZ0@zQkGC0~2}`RtopEeNV(g@U=c|^!Kin@Eirj57%A?jdp@AZ*b<)SOh+l4@MhmAFa=t&vXCB{oeV9_O6;~B6!Cw$c1nD3Z>l!6)PWX6gYA@ys-Sr=gl&_Hm5m**+aKV zuC~sr%e(dR*SWdnvPZXW{kryyj-+arxx1Szhw_B15BonRT&v%D^xP)X{Gb0BKC6EI zmHapT-pk`rf0^whF7^M}ari$&&Bgx=9LJ{qXRtH)&v4)L`=!TOm&&Kf=`WA%Kb`z= zn;h50qrTa-)BgVTKW{nth`46SrgclT?s{#G(eC7!<$8DPef!F`X9sQm9KUk;{rw}e zR!oW8wz@)fioe1RBfFnWZ~2O^Zan61@o2wO{X>aM`=1|^|NX50hnvazpC8-*{apOF zY;E=5IQ#yu7uo+f=U9hTto(cW*QXtYkVW&%rkIJhj zZFm|!`M~4J{#&lpL?%|3R%REkjgE_D($iC%lsYM)sy*>^o#`)DWIvhIA2M88f4;~5 z`&sYDUhBReJ{~l^-6T0X>G`s~w*2R_(>}`1zgc&#)Wh!7d)@uhpXpC}+i+&a?Y0jvIhbNhR0ZI|KhNWzgKJ9?8C<$ z;$#B53L|zd`SNo6m;Tm$B0uyauI{N_IOC1F$J+NwJ0^K5Ypq)GuwVUU*Y9_R9apNj zR%S1{abw>P-Nl`ILqc!;ta)s)dd}5euZe+A!#w4458ttMZeQWoE4j1SdfVGucbU30 z*3Y+F18O*8!+1BB%PIbj+21ky;kEY6hp}0o9k0Ew@mwBtuspTk!^X7fEk#R&r3IKZ zAGSG)K9W*DzFz)^_0f6y`>($J8c{l5EK5o!Dz9}_R9u!}MXg%j-yNH_pXs^hI&1TB zu}hcCb=V?mvp>q&b2=|@UR`nR({bO6CDA8L4eM9N{hf60TE-oTC6(M-4HA744<<3P z?d*={`lJ2har)})TcOvcKRR6-xOVZ5t6OYimoj+=on`FGQ2l#m;?FhCZlU|~{JjEp z1x4@6{?BlG-`nfwr~d1W?F%cb&di^`_xr8a{(Ex@njatUdip_j&GyYvKhlr%uQ|W$ zpzeQ$rmfv=R~PgdD$Kj?74E%(BSE^@z^VVU!9kNf*1sKoe1Bv+y)pRkg^GH&z1K=~ zvjg`}54-$sS;X@VyV`DWJp7zk8Y$(bnqoacaL!c4y-D$nb^`N#F6S(1k0`smeA2UP zfA=`9sNE~)dRzTh{QTz|QW-?kwHzd6xR!p{en6g&t#RS2c|4a7TsHY#>+Ey+snDH9 znc&#G-TgUhC(m5?>cGV9PZj;O!ZSbZt&a;^|DT~T@@D4K3(AXq*V;e0|LJmU&AHhR z>wBUf$sf+=`mxA6F%{(?-o+00f|efS=H}PQ=Z#zV z*yEi}ea88gT~p_Gn6_=b_V`NTuFIE$W@KJgDRX_CznSgR={)Q0qUy^{7k--*y-N>1*1l^O+aPe3cS^X%8V=Toj&lzTZZeBFyG)<`H~;hY<+GFL zZOT7c=XGCu*?$I=t@}+My7)-S#!3z-7OX|`_GV4 z-ORLP^2r@q$JlPCGcZQg@&0(azM?I^owGP|x#CB)t?OQ1Ptth8s=UcONm(xDVgJp` zg_i8P4Lc?7UHYd|BeVI(`qj5@Ry;q{{zz=qhnMBE3O=$J#akbUanI8|vvlH=J@b`T zSSER>ZTYgk;U34un%LY&*|usGU$diUuRfi0YwG2Lxuz!)3ob@)S*G#l?3)NJ2G6@a zpFQIzs&3m~?_INIzjT(+kJ+!6KVAE;{AcEcr*^D=qCV>P);O-nlrx@m-1Q?%*|GQk z1nb{OpO&6&BJ@Tj)>r#zt;DHj9j?`rjMyC3r|h5mHYZQ5&sh7owAF%FM|c`z`kVgo z{fItbb@bTUCAZ^GuJmQUS$B0+*7sT6N7>%q3vqiEX|v*c!khlAN$fIQZ!=~+YuAaL zyZ`(@wg+2gSFirh5P!+em)55*#$MiI_~G2+2iI0Ejm;7~|KZ=cUvKX{58P=s;f76k z-W#PIa*@4?Gd=oTbJiWb^_=0_{I-8GH5MPk4{^`ueUY8Ju0lArFWKf>)tc?`x0u4z zBBeffUGjKy^US=&r0S1hbLZTOezQA%+Uck1x!=pSZ4U~6;``?7=k2EVSMS<&Ikzgh z-Z6)uOX@+e-|7>M3 zcmDis^GE+<^GrUd&0O6Ze6(uY%ES-9biKUi^JeKLvQ4f?n=2f$&vhc}l_DMMKT|zV z2%PNj`;h%QzUQCJkMzQaZ?*C}X7x^MTz2F1lN+zXqn^xiKK8Da$FpeCksl?eHsv2~ zY~n4n7uhoZ=4sD;mrc$~RZKqgPk2YSN7pf4JAcvt49T_2eOOxxPV$rsa(CD7Z<%0K zbVFv=*^D*w<1fW`?YbSSUn{!S`rR)5bLLbPO5^Uw5%#cdYPi%mCqPgH;3Q(YV%c+e=uS!Uy|T~=*j&Qle7 zn)nN2#P%=zWBp@Q(ubo*pSCS7TzX{ly7^19*5()HvrK#=DfUUbBhLI*$e+F?OdP8( z)R=qJX{-0_-;yeD`$hJm$d`he9*J!EW}bf1JkdE`Y!|mE7nK*T-ZpXDTw9&*pZCr0 zHuPLtefR6vUAIqNuWx!$I(_rjt#dvrNy(2&V|%glkNw-iZ!P_{vsApV zgnO@dfYq|~-%dxbbTOOGR2^ZyVYA1)%G^I&LwJ}baUKr8QTkM#dynFW>24pl?Nx7l z_ht8f!5ic3kO) zX8&uZ_ChsYKPp$p{aEjFY3-XWw_lg;`zSJf*4)WH-!+w%ewg@#Tj|zu4e3wwiW#>r z_@+@OS>f#Z;d+(@9|8=vNRZ}FeOBki-q+c4&LJsNLV3+n_g+VMx$ z7YaEe z=Tt-JB*(3HBZO~Ousi+m|8V}{+`ichUvG>5(44*e#0IP7ovBxH^^UCG;+n5|<`dHz z{s+%W5<2h)~L*Vx@+Fs_j)WBb@v3HHJ|Y>+N9uk zTLM$7&x%Les?OeD9rkW%>Ccka+b42e>yB1=|7`#INx#nrBOl**UHdd`)%xb{6SleVSqxGn;u=$X-2h@a_hl;#qIB%y)nOWAdM&`Fmwk*rxmb z$K!w0&j@rCfsgEclD~E8(4+dBpO0I$&fB~uqx_cJ#PXF~>gx@JHS4-RmA~yST;m;-A2knJZPlUe+ntGch{GZBuUg zy`u-M19u)+AMs=Ihs%yXwm!`6=c|}z{qsxqoX)pzOubh#8%@;;a^F75tVFRxE_jJL^j=D{ttWTP3sa?%_NG zLrwL?ude@*F3Zj6%3W>oCFrkiy~?$H%NPDA_q1YbT$b7P?%CfpYZq@fFQ2q!^O-=W z_Y-eS0<9xDsdBxoChLRkwuK+F512*S?R<3B{z%okHQT>8ojy8iiP^-=YgwCO;^02LDP4WN>6>5w8JbGsMPAFvU)`$hTwJ^0&fa&kHRibpocd_vb2PM}W=WsH zgTCUPhHUnLtNY|K?Zw{A`p*z^ajl(VT;_j{C)dx+Zk@Vwv|`; z$Uc=>r(LeB-IKk-J2ykM;itgaC1TM9^LJ0?+kGM}C?vVy)DPd^o;7YC{k!&P^6eLI zz3*|2Z~im)WjA6D7c_l3_o?E;`-0cMw{22*>i1D!+N|c@^2`VGTRzmJ-2J;czw#rS z_~f8&*L@#Nk6xM9t2E)ez{^kPPI79UF+VQ!;rJY>`xon2f5;!c{Bm;UFZG*GF4vv@ z7WO;l?bE-rG#53$w>)=c=ZxYx?@yj&=^^f#jxAO11xAv;5 zF2`SZQa6w-fX7k3d$0aySpQOv{lk8rOQrQUoexKC@A%_fzcBKZRZ+#{sH{$=nAKq=-Xmh%}a*gkj!9*sFt?T(}M7R7XToU){p3N+N&XYEiUjF*~{79^0&GQMY zDc;}q-g{A2zgkH2!GqG}YD>>u>%4U@+Vu0K)2F`u*djJ(<*TdjpYN*I&HMBAcHHuB z+1IxERzGNO%_}_{lRfK_qw3bL4^-ycX~t

p1A0IFYR1xMyMhqxA}Z7uZBUs{Ch= z{VnWU)zR;*f8-xV*9*qndTFOI+fjdC_IxAX51QGXOwkMzcB{nK7pJVPp1S@}J=3dt z{yp{&Ps{4v{G+(t0%if@0YM0+8?!JECyVgrAA?oHuwf?t(V#3KKCED5BrwaE`8}=xa@rG>N`Ic{o<~ccAqD3 z|D%>mcJKZ2(tfee{-gY%_g%NPuJ2g4UjNJ6pl2>!?*n%36>D_8c;;X9 zq5G*3k7scd3;msW?po&5*`jxT{5~+BJ^c7Amw5(P-mKYr*XKXTnhZFML z43#C8v3fd`%?a+E?3-Izd+z@9Wxw8e`%hh&pYiwAe}?(%R(s37Es7Vf@vePvzE#fn z`j?~pD_%zFKK;6B(>u0^2jza&pB8uZJN>9Cncq22^JCAgHQy(_HoI09aAQK4wnYM8 z0ox z)=QI`i@&Y2<|$pQ(^}uI|GxC~UGtz1tFn1+ecMo~HmhVu#L*`{v3ok+i3Qb62n|2= zP<+ns{|vo*jF(qzKD^ybYU{nl$%#iU`7h17Zk2LE{Mq^Wxhh2(k0(elSv?6C{@{J+ zKLgj6u&d90WY;fywO)Su@_VOK*X~U$i^2la-1 z!ms8@?GboU$7&F>^!;s9&BR`%9p#zZV{95P7<=7$FLAfAPdnf`$@`#63SuY6^7y<@BS=1pziL%V8&em(Mz zlzM({N9R4?i+A67hFw*j=le>)#9Mhz_|6~uBYp(depv2PFHq5Zctef#{5Q)^y~|Cq zuC*%^UNb?%j#m$KdwD~Nukv{!3dr^)W^ zpSFbVXaC-N@8!O0e^UQtXe{c&95K|KT)EqQ>gKZgev6}aT?}jg&HT^cWB>7${bl=? z?f-1UG8S;%e>wll;r|SBi~kz`n^FJxYTDpdmSUH8`PrMh^Ba0mC3oAjfKw67Jq?GIcf5{~@BgD-{Gr+9qj;xH_&WDh zuj?$Xefgced#~8HdXAUVk~CtIjbq()E<3bn^6Py|PwQo1X4?Dvk$L}?{|s$*{ChvD zt>is>w0H5Le%V>8y?iDu@oqS?_`QYtyqT{rZMAWHWUGEC%J$saby?G@%%*1Te!lOD zo)CwU;CbljvuSz`q5eYNS^0M*yW@D4EJ8#!!F*n z^{n0J=*=HZPpymyF74r8HIe6%i?`=CH?7o9k6E@A+Mz|Z{MGC1 ze)Ii5-=$ysxBkiPs!HAcr<1;l1O|a7rg83z#<9be@gx7+{FO)3KXO0PU9xN1#D#jx zA972ZU)~xK<$YgN>1@$|hLlV93qCE6J<828=iM~pr|T{pn|{==n`J#P`dHcIf z_X}r)@6v0uol*V!M`+1?r3%%%JCnC&o z?Ozn_E`56Si%Xep0_Prz8OnU$=yQJOl*D999fq^6YwIpn$+bVay;m+*JZ7^0hW%SE zZPgbz%A@Xo`MsV?nuMxapr-n{7BP>zA}4mw2=u-?C3p4ye|P3TPnmb?W!j--+w-@- znEqtj{e4Gu3)UZzzvb<6Nrcm6KXZg^-shg?6?fU2F8c3s(wi^4=fwG?mHmhNyQZ7} zGx-twF!E1mY3U=0J)Vy$O4k~`kMq2w@$XjdrKg-H0`>*F$v3T9Y7x^P-TtlT!n2PD zF0gg3TicxN`|#*TX-EEV)mdJxXE*$PKX?9=b^jT(Pv0@zZ+idMkNr;j%#?0NU0%og zXR~95((c}U7geJ7TeJNHg!jY+PvTfKm9 zA^&XI*^X+bJM)BlPm5k^pKQT=I?01!;*Edj>kKaa;r(#`QGM^%y+!x(+4MW~N-ehj zjt-i(Jz(wHO)+;5%T)hoC|5ldV{s!w_DECla5?gdbRY)lGlIr zgI&C5YtQ(&OOwfZp+#!Ki8o6lJyfJFnpaJVKJ}kL`&0b8E!BDJF9dmA@K615d)EEh zd%NDez49^tmUGyai~BVGSbY>*|A^QAaGaQw-}1a&=hkOzzIn7*Wt#s}qupBV^KM!1 zm@s+HW(H1cfBVHRfA#)ncyseZ^uy3E!na<<>3Ua3pN?(_%g$9|{MJ3Spfcvb#^?!E zk_XmT|CszS?efE`&n7NcYhP2bIQptsWo}6?mqpyeg{`5>X3SjL%cS;&U3XHfQ=7VI z%l$2R?JNH?9J1Rddg1GP8Q(RT6SKX=T3Hls-SjA1a(Q-VQs`-RMn*lK{8npUAG4=X zul8G7+r6&WR@=Ry{O-5&Nxv6*a;iSA3k>w@_^6vUvZJ4M%m~1a{o> z2#RhNnH0V0xXH0=7MH|@+ieuvRjyy$cF*GCvX7$u?QtKR*WZ14Jfz=urq|hTrKxU3 zhK+5dLB?0Esdav=_qbkDUHQ->=l!Fd4R@5{eI|BicPf0^^W!pKjL7FlCrd&te|jfe zcNHgQf34sp!&s}TXz!Z0pKdQ$n!RoN-@Nrd;-BvicKe!It=-z#bS^njTJCHg|MPf} zy3^XK*Zb{6B5uB#_WjrN_8Q~*%tyL*x6U}*bt`(iOuF^;u+XQ|l`^08J}l`y!ep2H z?WLll$^$-6)2II#`2I6w#IirK@5nXT?W-SoS^3Iawfz@MdA&F$-HzPWc~xcBjU_z? z9V^bMM%-B4#uYwGamnr2@Rz##eDAOJ-g;cW{nxwwOK&{;&yc|{9P!8R@7#TA2OoMa zy>{ETa?4(?dzU0_&R+0p`*cS$Am(TF^`|Um1;(=;@8hcIKiq!QUaTf|MaAmFx6U`% z9bM{P_$ahX+Ey)k#@T~;-difWBy|?0-Tl6p;X&`>7yG3vPJ92TekdD#WY(3l`Aaj) z-WM0VXuX})*R^Xbk>;{Kl`}xaITiC%hsN~_RC-1EO~dT>SpDChRHumcHjKwe%_wvORf52@i*-c ze$VHAJ*{f$x#lHzZ@fzP+|&2WVaVsMNUxA}5U;YVxfNBwVZ zKU#IqUa6Y4?(uJ{Q_qgQ(w-fepBlltD(bXM;1iY0ZZi}%dGsy4o%i_V;cv4(-274d zaK3o2dX$;f+n!sCZv70XU3Bh}0MBP1o4Y&ui=HN@C(oZ6J*QM&Y)|O&ALa)?{^$<6 zq;Rd|tkt(`)y-yiZXQbwHP4dtKP!-Lyz$T+f$a2;UVo$F`hNXqh}JE=Gv%MyuGn8c z>*t^L?Oj`b^p=0~ql?#Lt|V=o9K5I9XNyt#_wPZ>n@kV9y6yiodTQ^w3U~L9W(ya^ zXN7rJ7B9Lx`>pNW+XCm3D)yXR@UZWs4DW%*Wo~@DIakW}*B|!3_Mf3)($-10?)U9q zx?f-s^e8@1B^rL-erx?vdzOl%bMh?NR{Psb{Dms^9jX?&QsZ05x_57HY%q$vRk_GjZoZl43*N%* z{~5NV&6&q==}-2OAO1(~DSx`@5B|`q4U}4`#g&y)<{X$mdSDi+E4eUjmE^NICS>I_~B-u{#J@z|X2yJDx`wh4WBy6r>HN8{{yiUxlt zEWO&YyM4Nlfauhu$^UGho<0;a>vyv2HJv}sizBjj)f{?s=+`~#)s+njp3*B)bL89{ z*=K!TFZA!iFZR|ynS~4PWL6zLy0rK1y!k6%fBSHxuP)mi6?fbw=v!FdwSW7KkH%N^M7J@)ikC~~ zI^`O@KeB1xyj@lPQtS!hZ{sr*-UV>9uY6})rLzzA8m=~5wSQN|X%D9*^C~8YCo%mwbA-XS=$8A@ z`|a{<|9Fl+;X+Vc(e|zm*?ak&;(vyS$&z_Of(;=}n>HjI&U>}fT;0<4 z-apSB+xGpA{qnu+m42yfNqqN~Nq_X0$Ch8Z@=r9oj{S%E2kC=_&W~R6rXOy7a5>lX zti{i7+oy*MzX=qdyoV)o&Z-6v#|A%f(}(-Fv8|SEKO!%A`}FFJeUeiB4bu)zk83%t z-*IvI4z=Tc)>B-I_WU`RrgC2Jli!8x*ZS@MtS;?St1$QeXx4SvZ1##HDLpZ6$x}I! z*7Yraw`tR+iLaKgEsB?@Gxz^cbnJ8Rt{?S}`UPH?h3(JUdG$+X$$2$BkDHqwebS8l zRj_f+I|iW&u4S8d-+TYjx2m+xKfAo-OYgMZb-eZ$E7!ii{j6T1Vt&L2@uTrluLE8m zclv1hBu+k`=ZAT~rtPyH$8G<#$Wyo6tJkT^iOato2TvT{y_Yww)Kkl zdwPwdW))_qY@aB327I1f_@vuHr*)?L{AWnhI=@vu_fRW$+{2H|kIPy=+z$AVH-B~1 zEBzHemi^=Aj`oh2_+-10k@{@=b4nJF8yejT5rzwW!S$sv`SLZ3WB1N@jCq|SJ( zGPyYX>RJD*wIlSXbnX-wd4Vs#bc1Hhy1s&ZN2=Au`j*Jut2&@qOw_Q zr{wn+`_wC>kNjugePWWz++BXu#8c$ci>+ILfA;!bg;UHl#U1xo%i zw9S+Lcm0a)yZ1fQ4I-m%y!^QOPVAkt3Vj;MJg0WgoWZkMt8tPC!>ku;?p)Unt6D#M z^3Uz%+4;3{+YOeB%InTO{%U+yN z;hFWzZO8l$IgQJI)YnxQJrBI3u&3>Ns8meV;;ElaZZ7M+W)`#3V&Z|838#vczDlmU z)$;dx?wr{_Z)x2x@_)u#ed_P@yOs3^;}sTlX~HLwV5@wNEb2P?F@Mf2hrW9+bJHee zOf2_F{9IQ!<46~$(_sa{^Cpa2T`TWy+g-i36?wXZK&7OLh4 zu+ONw^iS;Cp7amFN2L7Qx7;-CO$ysOjay=Am-_6}ySm&$Gh+p0SKPRBp!&MzN6tTq zAMM>fJXNUs5%v}{3IWs~ujAh-02#vk@H4 z*S5D>Kl9$LR%6&XJu^r4UgvL_LkWN9*eLL>3G=$2JyG}V?GlgPN@ES@#woYlI3Dy! zo&UK1Xg^bp`l53knA#@9GruJK)UK6l}bn3vPS4eyx-$Tulv{DpIZay&&^Zk0snZn%?9MkiD%%BrT>c<#>*IR)*q8g{FYobP{$kd80R9dZ|6BPqm*YKwmV(o&>^O#)pO**_59yX73&{`H@s24x<_%{k3hv;JGWfc zG4V}4eBC@;Jbdz*jT*mQ%0FtYKfB}Ki>V>WXWc)>AB`9OBmJ14+i`LBKcQV4Kk{wQ zUKX8k-A-m#_C~SGs#n^cma@iA(pWY@UR_3O&U2OJA6?f!`%`=OKf|YZZ?!|!UjG?R z|IprRFMm|7H1|XHv5SB5eRKcVuC)E}an7v6Kl*&**e?8M*wm9Nb!?kl-opw-ZSmRP z3Vt`*6$Wp4{e%1A_1-V_l7DP3>)WW?NZGfUz8AP3o84QRl`v0w|JJRWU8PQ?T7JG_ zkP@_fGtY*1HJ*>x%U2jbTHk8&`lXHOs*3U>yVl&Q-8MZryHIXg_X@7ErRx{|;8`p4-%_gkyx>J;qXe7(gc@xgf^|E1gd*S?VF z*V%HJwQu&L>|V2H)!F^lin^+s)5^AU?zYcN`gybO>$~sUR@bH$<{VjdR@%+u6{EyD zd9wE(n1pU#o;5r2{>Qz?r9W?%eg7|7-sC^Sp$#A9HvcxRvAq!PJnwPb2iw0>%B7r7 zesjN)yeVzl^b=E0F+MxG%j160N>#hmS61;YbwVHQ53O&Pv%Z$IN7m-ny_G9Yu6$X? zKR>klEw@P7g{a%NPnX1b9O;>M>dfssO#c}wH_lq{pF!r2^WwVmc0wQLe7fnqzT()o zJ2$p17XPI-m-qITjWfe+*Suuiu9C;|)b7LGiMQ&`-xpo?Ppcw7VshYgzui|%YCX67 z)c?|PPuu&&Es1psJG#u%Q}}t0xGYT5Dl(Vby6Rcc%KLHo@kRd`e&;{V{(a=s$>4o= zZEauIZ2zO5%a{Im`Qg9mF&~#V?}^WRXnkJ%VB+$-FTd}So4d+AwWwd^+H?c2O?xhC zhRiw}dhB25k8p;SA8q}=`Fvz=|H2)iS0gOmzqNWj%fW@WV{XSg@>KDym)UN_y&-)f z*8=l+oeOPKYZkZue0|@}`cLxJ8umy2ycPM*`!8&XejCQ!eX}%PF#2q9E>GdK z=s&}O-lB{xZ_8aaPd*Z-F;C*{R!_yQgT4p*ZglLqGB?Qbf$=KipT|5!>#ye8+Lj%E zBy#?vojlXNX_xN0E?Bm)c4hRA-?qC=?u8V*RkkdeY&74ax6tfAgKVj-@uR2j+rPfH zc>BduX_l{TZprPLdg&pbYE~vq^wzyG{mhgpt>!LDKhK-KZFs```gK(7mL=b+{_*?k zmS*qkIdxJmbl2sN@pZG*pO;?f`g9{M%0KVOs#Cp^$}3os_C^0-{GfegpYX@&jkiNT zT-n8A$9wUe$?LsgvA%jq*WU4%UE6RvSLoC;i9g*5XJnPEZ}6@?Ex$4^Pw3LU$R`$k zCVIy&FiwuQsNwy)+_b;rpYeyUeXD+nT$9tdyzENOTay*byw}Q3v(Qu8zN>uVw6saL zo>`hNIwsPz+s|=j`>~(Z+25_}w!N&k7T0f$|8?s>!_ss2A5Cws3H{jq;CbI4yI(wY zve$UeY@z3*ae*mW|a_gs1!Y0qx_+6_k-TxC|bz53{qTZJ*V1NPNe%NS^a0iPBH^OwP#s5pn$PiNKflCF>MUe%So5zr!Ywb=E8M zFahS=jQ1PU=H5*1jBG1ddbh~kan|Sct@|ZxqCX1ROI1|e4*jrf`J-4R>FCwGWyg*^ zH97fc=Cj`B+gr{mWIWxv!@0BIM)0R6jY8YC{kQ+I{`&s%CRf|3-}b*+Td(pqZ{ITh z!~QK+=ezc&&FBB&ee|Bd?hns9Y|Mpgr=1U*cBtZ=UZ})TCEt9N^FnNipO1a?;+i8< zy57FgCh7iW`|dxo)eGZXSN`aFkZG1_BDS_d=w)J8_3CY#&$uUVJ^Zufo#CQ<>yu}< z|6TYeXFg*M@8k7v_L{!iYAva^)$`$o9g{Am#c(`eOjWy?YTbTIwX9Efzg;=k`+wSh z{O5c9uv}B|_;9$eN|dnLVvBiaC+6!WmR5IMKI^zMPWNG|ALl_Hx9spGOY7FEe_y@- z^QW7+mEqrY_pPn#{z1KTRE4EZDZag$2{M4jK^_;IR z9y9wN#?CdfWZAfSz19K2FUy18*S`N3RUdmYY}3lm`<+!J&||@(E)9l7T^h`% z_u0Rev)TCJ?9}##Kjtf6hg(fOe)IkNE!Eu`@3!2IU9xAx+B+W>pQ?NHv?Hi|!fmJR z^Zq!0h{~SNW>U{`@V8xjMZ2f3<)p(tSM#?_v+}*v-e)#<-pjUv@|lU55$`RfH)-G8 zr}UrUP{OX5pbGDh58rxKT%YXT`%!ex^*|M`oez~~$^DJk!NAwH$NSiShOOouU-ECx z_WrnT`ohfr3?Jsava&t4UT$_tO0~t?wO96B_hAe-*z~iS%~7{dKz-$kOpB9tuRoRj z)Q$aRy|-7nuHw`C{|xN^B zOhn0-u=gLVU7JIy)79v&Xp>Lk*|L(U3w<$!i`PQ zLYLRLFe`0YC7{IM_Mw2m^r5`aK87Ft@jqH0-H!I>JzJaps5b9#Nb^zdRVrn-Qg?NW z?(ND>*4i^kaJgc~8b4!oT{*tG)mx{W&*tf2NOw3cYbp9fUbLnXwAd?7EVf<#)^)jREl(}~XFn_Zjl7aSc^Do!vg_N;ZLc*;^PjGcsxr_2yzxA@ zmEF6O@$dZOqigo8E3(>tSo&@B<9WSnf0REgna8<*^L3TI*KG3gb*7qD+iSUGlsOyw z>d*2?&$+n3cPG0-`m`UBADEBXiC?!7EU$ccu(QnCSa-or zaJ=pI&hk`g4UI1s|K6^Qyq!Dmb+KpGv-=^Jzm)I4TVD3<^~T_DRUg`WY$P9fe%W^M z>t5!EPp8dF*m$|dw{Y2=^SOo3Q&g91%Q<)Lp?UgSop@`*dwcJFyPCgdo$idOk&81< z$~q{E9{!{GV}Jg~vLmmm(%Vhf+B?p&H#;72#qRWz_+^iFFOFDVDZH%g)WmJprJ{FZ z?x-%^viVe@j7Mz$0%oMa>6zA&N!_3uSGZg*uGd>K!87a8R{P%jg7V*rKQ8Yzx$@z_ z^*x%UE062G**0D1<*qeiiJx`%WE<_uPrZ3&jmYXxcS6s-J%1~I_wTF!!mHNpD+?^}qdvJJD=?Ji1nu-P5>?DoM8bIPuRBsW#Dt6lkef7X8nTiqXP&2#syI5l@E zuUv1P{IdS1dC$c^9)93{F8Bdi4It`6jD#^FOj3KVm0vseY^a6}?&S1B!W@qPwR~Q)Z4isGiuV-{Ahk0-&MljSO1~=)va%>59@i4-O4e%bN`2bz>Zk2%!8kk`u^qx z-Mib(>Jlm17gO+;?|M_6#jES}f?v<`RxI}yGfL{{C1hB=6_BvIpeCT zx!|hfa({b@jJ@XX&N#lizRCZS|K9Icm&APa3wks8=e_%-f9;-~d+qe2!}LEx=RD01 zQ>;>6i#6t^>)cvc_|((n;q4R5>z=y0+g(~4{bTM!@sIx*4(E&hNU}V-W!AN+rOS2B zUNY9vl}LVLp0+D1U6P^{BhmX)iu69 ztlmH3XV3Z--jT4|`*yCf^VOVt|L$ez73Z6X3mM&he@JI`igm$JX`8N%AB>O7X?-mG zcx?WoZx^3lwXt5mZOZ11X_syvRH;whK1XPiw{jj^5$6f^j0d6r8T#x6*Zq^MkTjJw z(~d07e(QGLYt!$#_nVLF9(gG+DPW1A3(vU_0go6aKfCdM-5mDs^RuwGt1oY# z+Vk?L>q*tEm;W_|O*p#yIthHM*-S)(2d!$t-`|{hvW(+s@R#Q&(l` zu8ml^$=m3%&F!3sUCQ~L%u1ai3aX52dDXuqTs1l0SSD~@WV@Z^#vNBoqbtH5aq20| z`t)}B#GsE8BhGBx8Po2OQonCad)MT;D{%(o$|>mIss zvj1!!^ABwS;nei$E_)qmK&T~2WKMU_pv>@>FRnSE+oBWrK(+}m+(*X7gXoV;H0XdJSia8mR3 zvFsO7pZPoG1@&$0mshAC*_Ejxn`)E9-%jP7 zP^5kN#ojCXkGy}KasR{iM`E7qUBAc0f2a$?e(GA+t4rLk%eU`rv+huls8||s>hOi; zgDbP;kJ=etu;E|E>wcu_+3bgZZB8hOS1r9$c2Qhm!tOhblNcXcKDfZTqr}(ho^{5f z4O=^PXYApcutL!AgsPieP*GLYRq6SxFScvvwEJ!AN$u07+P{oKN~hbMnE z#)W@us?EB6Y(LM|xBADXzh3YvPI2=_Dci=*Ts@^pl20<_Tw_1A-IyW$EqUX)?AtFG zCmfdD<0;~?@koQ#A|6)e1qC*AQZ_9CaU9%l~| z^LqBR#lNN>Ixksq{^0aDsgL#DU%b1yU$6Lg|33rA^Y&+#dCzvp@7=2UK3)5njJ5|~ znRtNF!RglD_FVbOtNq~KUo+ptJoW1(*hoJY4&~J+k0~U)D9~7j&lFmlk~5GmT~Y=093LYUcCQ2xl(% zyZ38!>#Rqg-u?49lJ@A?qw;x~OiWJ9D(jq{JV;1+Z2II^`Eh@ltXtykCQ~luZrLyP zdYo{+IcVTt?hX$drZpW6h}|iEm+gW~i-v7scX+_SDGF$z__XOgF&zMGgKAZRQ z&+S$NZIu%nSL?{X=Gu3u?lp5`#vh-L$9wmw-g+S?G}+waf(Llcmo)jy-Rkas0`jRqp3JmX@AA zf9d)k;Xg~}pFeq{?)B5x)pfdch4tmL_OHDdZ&joB<9h8!^W(=4+9_JucYWb4cG!94 z;jV2T_AOj?%Tvgk$6vs_=i_~s*F{Ub1-TnfJ!~zF-;~?BZTbhlQ>RYd)AqKmjgM@T zt`JZuRC+btF*Kxrfq{XYfopZf{Q8Sq{xgX1mrZxiKlI|86bVCtGWp$ha`NI8uMh5e zdTsG`zYlk|n|Ch#DJgvSr1(^kqu&dkE?{A2W#(4nz$q(SK6`8?+w5H-xW2Y`D?Z{^37iRW4+hM`p#1CSh1sD z@Qi|+h)mi3U6SM+h~l!Yfb_$t<${aF0CUi?PI_v3L&n|5Dc+C%_k zifkK_EW}r-KlYw4^+IZG+d}J_Hv zMaXDG)rtL`R~paz$g7x{7W0-qu}xR9mL}=&WC~a^B7^>H1B!2OEPtpWG2mX3pK* zz_8Yy?N8D5JOn(C*2 zwXXkq$e;J+SBkFXKl#tVp6{#sG;Oz(JVl3ngm?ep{;~B@yws28fJ-*%s(sy$INtW& z|D*Ju;fDD%bF&Q98y>r6g?IfjoXQp<8m^MU(-?QM-ul*!JH9e$;eBSMj#oP*nWnle z_WJZAsJ?9b*J}^fy|T`)KX&@vrQ`ikHLO?U9~^JrBdWKVw{7Xe=pT6{ym1n}hmLI9 zES#3s@#g;T2^&u|wiVruNDZqa{+8*&QO+#y%j@4H*h;-8&c zko$C}&at%4cPjm7bdI!0CSTj?xih3cwEE}%qW=tY%HFQsvTO3D<`-Y%=KbBTneqE& zZSI8{w;$1KD;7t8c;E5I@==A2^`pIV*Ipfnk9|2mcl*4158R(?JE?ZG-RJRJBhM8+ z;lRWg+2?O&t4&_$zT}7f(Z!D2c751WCv;8aipjxR-KUErJ*Jrn9aEZbE$$NWA2 z8N$lCrhjYMbKP`*=RM93rOny5leb$f1aX$(l%6IQm^i4mqPvmlqqUf>JV!e;O-?F7m<6N5gUhLk@N!HUc zLZx&18XhyR)7H4<_1S^Z=4 zq%W78Rr0lWope3R)Arg7y`zWL?sk;kA>iHQpkLB)Mzr4X`y1;RrymfsGr%rh4p{&q+Cv4vL zW8bT`K2KZo*WNbt%--7?Yu7GcdinCT`^DROB9kK*SKM`0Zd$W^hw0;Z?)~Yr_cQ!x zKD^zkzsqg+k_vyn9hYiM6L!_#e6nTx%*E$#seX<>KQTz-aad>%E4x<2v%PKK%C>Eu zm*T8^9I~hO(LU$Db8ONd-TagJ;neSteb@CCeLUA#zA}qF%1ve*0g ztv_Mc4u?&4D%r5D?QVoi(}mdlyp)+Uem>S!mv-O|v(Jnd&6>ycL;BI(c-P&z_Q72@ zmD;{K=;owMRWjyjGHN`{)03KgZl6HruVc#}`FFixvZiu)<{#~($-#kS2N+8Y;J){KPR8>oU&U`Y3hTVt^Xy4kAx_SW*>{&zCvn%IU&L9Upkdl;YI`s6M=$@}4wnfH4h?YVXA^RZp~-)#MG zI_mNs$2GC>$CvwDH|Je(SLdzXrR3eSo}_pvay&U+^pa=xhIapCQd( zqGH=M(+yW`tb2n_DL-3#bnn^i(~H?>O*$&AyyNCnhSe&~*}FEVhc9{Z=jvbK`0Mko z-@7!qrhE0?z4hyNZ?BK|_~K*z?LV^l4^_9fUEJDnZhNtXn_%Wik)BtpJXG%<@!)#W zdt@Htx>GZ`XD0UEESnbK^{dc->DKG>f7VZb=9T4Xx^@5h^Pjjh7EN??6$xN;6$xOU zSyTQa{*ivS6yNM68T-^Pf8pkMcjVX9c`KKAu84j$QR8Rs(};JuH{5sgoRa5b2xIkT zK2sC$ciNhL#ua+M?jQMe<=Z`z-4|6Z%Ex}Vdv5X4o0YC}Cz#Ebed!tK{bb3yhdZC} zFut4mX#SRa)3g3bMxXflUgT!Qyo`B=5ATX^`#ST@toMNhJ}$?FHKu1u&pvxsYSC_k z18NIz>Ta*MUAuhtt-E=d(mQo?^LJOjezHA3w(m!UeduN3Lt<6(9nUX+>6QF;|90H5 zTP?TdFp0VJu2UO)l$$W2Lz>k}c&WlyZAD^dkZF#-ax{d4|TQdHbY-4->o5@$teQVbv%R~Pe7R{8r z_tazd_nU^5HA&n4GfY1j_wM(vE&HQ?72AD(-aA!Rbn3Kuv;H&q@16L{{6E8{_*VZT z^Lc*k_PSE~SIbvDeD}3Hldm7ymaA?(^PN-e(?;p_S^_B(cC+}dGwLzhC>nh*e$)Lw zir@c9?vzvi9eJ^S)4%vPf9I9oc)MQV^|QIV|J~fZ@4mjwyA#jX=KOK|aQN~1Z|8e* zx2|>e_^98hw&|j&cD~drq1#cM33e{VnXJ?9sK0x1c-<$-DN`lx80{0TyA&_DC-~8N zF6Z?}AC_M-ZC?I!;>&H~26q(pY~C}^#`=!czqe~TEQ9xWA3HnWtniv(_V4}byW&>= zx^!&vrC|Tf`{$JI{!;bx&w0Ne(I2EtWIf?e-z(rXY^72fUWz(LqAp@+U0Y7+onvn z@4g?Fe2j8jzDLboOFMXl&?bh838%t)Dr;ZsKdhU%)4^96sj{bz96e5K^c zN42f1GjC0qRkK#-=gu9vHmR{9dwTe;1vfA#FRkqPKF>-{{)2ei^7%)%?+WiQ?M?c3 z*#FXky>3XkV+OPQY%lC(+1+DVs;wy~wAA zGnM2b>kYdmMeud0eO!O|{14wf;SbJwJrDTwi6SvRX|2D4Z5BX?+@U79~h`qCe_if#C#pM3>I|8XY`)+-!u6SBx_u0s0$@U`8 z_c7DoMn3pm8}5EN+V5IT;L?iI@%y%Z_&4d7c~_~eoy0tMU6p6|?mbj07SXm@vPsA# zL8gaGIxp+i-SvBK-S6Eq`FfGE$f9c})%JOP|D*qE%Z*od>VH%}`X80PS+e@R#Eqy5 zwnxvdJ$mfk?yUCrh>^;?sC&}AtNK1R)fYrCIV4_N^ke34yPFlSV=k4-8ofGx_-(Gq zWU{KWpXXo^ZvN}SYABip8k){)i0;tQ~dOY+iZIN z^4i&v8EYQJtKCZz54(N*)~%PzE6R;_#_I@#e5&RZKmJeoN9@Pwu&p0OuRi@ZcU8=Z ziLtN5%@@2UI#-IbrVc=~}Sf7bu{ zd-vDk+a=16IrphmEd8p;BFEiz>zu5dg1~8$A7wkIOehk6*2%PuFRUZx`-S;8=P`~8sUrFs(O4f>f`gJq1oLQ&9db#Y+z@w=_UIIncDrXhN zEmy3)yZ^+~eY}74KK36zFIaIn;3I$c7JI?yioQRuHbuXjCpX)#LeD4Q@Zr609vt7q z`qS%qp?aBJ)aFP0Ew`@s)QKN___q4!v;Pc7w#{m(TDrXB@#W+e*FxUww{$zVhuu2W z7-S~h_LcY4v=g8CZ{801DAxTkw8pZ&dVSo>UDI2GBhs#KZC+_|dFi2J`btS}pSTxn zn*5|`)xL0!Jbo!bvYDKZ zWvj!i)@@!D|D*8v=K0?_nOu1{-CcZS#Y7iIW6Kjs#k-r%e=E4GYbRvh()*>n>NNLC z=_BT08kbD2o9h-A|K8xHILR&H!fJ+qw|DRD^lzJ`w)La>H}?-AZJ#5qn|@4|d3FBL z>9MSgtt`5@o;$G85Gtb25=>eFpkgHFAB(e0_b zRa5AZq;%5Znm3KP_BE%TET1iO;%%2y%sKvC@l)4+Pl~$zENqoly#4$y_x@$d-IYIZ z`-YABgM997i_;4izm)oL?JK_)+cqcVY2Mw{)e%OIEG89maKyarQ&_OA;+xcm_)a;a zfA>E4AFh{)cgmS+ef(s!^?k_KtoTRu zwTsu^&EGLsZ?Ao1-S*$uZ&F?HtVz4&rcS2TbL&_~`L&*bXRV@FWJ+gE?s;s&cJK&O28PzddHrv#^MAhn&(Jsj!vgM*(0BxQ@o&G||Em5oJYN1sgXyYh zI1(4xV1})(gV~~xf^*ZkqRj3rcx-Xh6TBXZ=koq17W==xSO3>Cb#H`K`JZ<2e|w+& zXSgtF`wHeMKPKltl-|pA$zJO2jhio>gEmcL-@o^_lJtq^p1KaTr>A`mG?@9b;}~CI zgw^uH_uK9Z)VMy{9T|2$@cPU(ew%swRw~6tc?)g6<#kAtIr@xfXJmNt)Z=FyB%ao1 z-{*<0xPBl`;(|VBbguu=sMy83{yyQ?j6HlOUHMLU^U=)m57kHH1#L#9 zX>QWt*!d2d#e>ZjDpsyv%obf;AehAJ_PFEvk5$R(lIEAf;_@zB|9Ji1-OpP+Ew<)A z-Th_H{?ng-{!J|Y;C{&bo>2GU>1DfmpSGAg{gUopoAxdGM;^Q9q>szYIUc02MKUr> zoFk+ye*8ZJ`yTenhrH6hAI1Nu6lL5heZrYDZPvf_&$b@Eb1Z36SW~@Gns82{%F=BD z(M1iuHQyKf@%uP!;fJWZd-T`d$(k>iYm>Ml^Pa@z$5+#?y!|R&aZ+FQ$Y+jOukKmI z&hXdZ;X2}|(tB6`NPcIW-~~J8Kc?6CkF0e%94vKhTJo-|e>`EbY3&_F%ZuhP)y7>vB;`82^Vvrp zF>kT;rae*F=hhvlou^*jXE5`Y!KIxiB%ezjkoal+m~Xv(&p!i`TlT$QUwo0C|7J&S ze|wEY$%1^EWWpLk@1=bW%JdsViN`%3QDeu>yyKjug5{t)|Y zX2$h%(}knY%_`VcAs|q*tbR);SMN?e>Gi7Cr#{>~y7cqx`e*N-mS|1u_Agbf*LFMo zCEnJ5Me+~n2Q&T@)x_+wX^#3Gxc~U=b#^E8dg>x~ip@)(psxAp=f1c<^`hR%AL0*L zrFZFX{q4=Yx_bBZY0Zq*>q}Y>eMxR&TszN$9d;p+~Y}}Crv+v&0hVZ>-Hn2 zti#9bG$!ufe&KIlY|+cN9QmTBlY&neiQN2__UT)4KG%&q3;K6!B~4kW`K3JU%cgq$ zPg;IgQ~6fi`e(oL`9Gc?f)Ca+WPhu=*1bH=b!B9P-PCN;duuDpbl*xYPG-qAS+`Qc z(7iQ)>Fr~NId6WrKit>+Al5x%&qc-ETdcMoj*Yo(wzyb#)v0sm-iz)&anT`i?#w+) zV~;%Dl)Ze>s`tM>U;lgs>vRX`q8V(MPyD#m{KNNoe(XPN^}PG*UU}E+i-d1xY|8qz z=+<=O9HBLVyIaex1yVENv zo_Rg9swz*MIk?2It?$l~_Oouft)kwKWA`mhejJ;9=$GH(ht=nWDo!8S#kOx(aqNkE z3YVX>+|p**7I7j&G8k$m#ojM<<&W8OE%>o_&5f7;bUr+pFJ!9Hb? Z=Pl;TXbo;$G?WCZL>>uIu(iN zPr2CsVAV=Lai!2XBEN2b@x6UI_FH|GUzP9CxAmXB{*^`N_1AoeKX{+Jf<0o#)h)BL zf0}A8_^p3yt8d)rz={QTcc1FGb6S0>+#A`6Z-X2jH^#25+5cPn$H}}u#cL|+{Ds!{ zufObXK0QwT(x1rERX08E)m@kzdHVNWs|U}OBsT31-IdFrG*k+|$mbeC8Iu-}Rx^m8X8=`GYs_9-Z3OyCUQKBhGb-pOZIdsN5{n zJyf_eX;;9fbcV_tPX-}t`J3;LNyRtrlm6rXao_fJ(PtkV{m6Q5x!C^fte|VJM7M79 zp1mWvzy95mo!;%z+{`;B_!VWUR!z+J_-FR?&(B@|8T4aw%vImqy1ReZpZi_g{r;Nl zUsRL+;rU_fe*Vnh8kZ|MTX@Zm#j70;+I3y!lAL*oMw0iGY3hfUg-))M;StETK0crQ zpYZ9$ncMHFe=0f8dowyZbHi=V^>=P$3OPyW?lFp9^D^IPT54#d;N}UI55oC>dt_ak zWqwq?Jx=NBAIIfCvJV~hyz=&XROnS@ocbVn8#WSO{YbPoQZn$ebO*uo+ zJvQ#ot)Df~KW}EfUOR1F$)>DcD%bpo~b5ie?52B^s-%T%a`)o`I;RM zt*$-75gvi}Tv2lby;mwef^ec86l`(}N;zh2`< z>_h&RUFN-8U)u1OFTMMt?|ALnuXC4exxRMknn{)YT+v(BZOgjEd(7gR!5Je*rM*sH zQz_jWo3wDPcUk=6+wZPle{yF_``N1c*uDI#7yd|n$SZz0PUF&jpO%cOzen%>+sOLn zypHjn$A1-PpGn>t!N1F3@dnYOKipk@*rt9sd?dV1_M`3U3a@YeKW6=ybpGgfp_4nA z7Vxj^o~)G<(8Vw%S6<_Q#hUlGn2%hu7v7(q-?^b8D6@3i<+xn^pzT}Q{EbV57Vg>3 z6(VjI_w}bE?;M_IJKi_VQ~vPqqyN$Q;#qQ9AHHpVz`rpw@J_|lq_*>()1Da@L{EAv zm(=F1F>`zEivt$Q8EtPR?dGh0_p<)o?Y!2q+RFL&YjdC9U-|MLmCog+Z z^>+FngF}+56j>_my;19}VD!min}*dqk1TFyo>WMgRo-bQ`GfnA9A~@hzG>fm zF63`r&8c3Qw|sZ?=|&Upe38$-&+Mn2l`(ucgJEH?mc@GJAL)x<+Dq(Dv1h;YBR*#H zm31YuYVVE*TlqdIH*)9ch^lsL4E(;QYa(ys!wF1WCw4wx_L2RF_WI8HP3A{5^YRv7 z*;+1t^IEu>d0@<~)74rBa)Yi#JnC0en`Ls4b&^Y|-tBVPxj&!gUfq}V>i!3-_~jLz zQJP=(pZxltq51mx?fXDi(7pWlZt+9T`Jwauw_GTl@wGGe?LoGWi@RL=)DjM7v6mJZ zl;<$ZJloB6_0fBRw7o|B@m%r4a$G->mu6i&|8TE&{KoB@E|*kaRN1`dWahIie81~HspLQnZ*kqkl=|8E zXL9-(zU)PTPuDH1-ZR>F|M2$Zy1V`nlOMJp&J})CH*IJf_>gLvL zuS&k9Hfx$Wr)0NyUBR3MKhvb?cZTu2H*uK?ER4fiOU+$XSUF#jQ zYuEP4Z>!3de3$I<mS(<+Z#%|d!s$K?VlVpi+|JK)gf2kn~Hbw-rlpK_spisz;_34%3W3Avk<*? z{q%o^P5XPMy+3Ysy#2{XtslSsntZM-eN?M|bb8dAcb$H@@W*)LRwjnOxa^@n3%a`~{cxx{L4q`g_`c2IZrr^6Y<< zuI%HAc^UO=N8*q2#5Mj#ckg8jXByn{7k#{;&w9o!F~Oq-sXm7CN)NVO$jbFUG@o7k zp>6NCm5(->H!a#D)!N_rRbTp^*M&0&j>+)`c`w#V5%W=5AaKU;v&?5X?jLg>@;Be# zxWDm_>!YvRqHFd2w`{J^_sKfDYW=%;MwbLus?Sqlz0t7W*pNXuE?n_q$z17&-@9{a z>K}T1sa`LX9W`(5qgb*Pdg-ReMXQTE>L1;mJm-DKJi{O159_6Wq&+|OPv^t4=11A0vwY=v)1$jC#rW7P zuXtD~VyPg)yJg}uhTh{g@o#=V+N=F={+rJaSKk-(yQMd^e)XN`m)@p(8and6YP!vr za9FY;<7tbA0xL^8L%8{GzaR6xf0RCY>wL6I|Hz3w)*rR>`IkuLx5#m4PhR(CR-|H! z!;7?fBe#?@JZpBIv*}nOqAHK}Z^j%fl`-NyaZDvy@@xLkI2jXG5sL2 z?UDT!FPp%1x1X0!&rSuFHWxSU>umdu9Dz+2`7_IZ-#)K5;AN z+#4Bp>*|wjM>d^Sy=;H8Yx7(8+XV}qF0!Ue{5*Ac{oR^(^Oydasa~;j-RXalf7V=I z<$l3R{P5)^e|$dr_oQ7--!naO+vZJeN18T99%d+RFc&!UUVCE>pIP5c+t@n+F;Tvv zm)uo1FVEx3ob%{!%TC4ZBO0H}7`4RkZFaLp{3S*z` z`!0Q9H=3_^JY-kXL!L=n)&)IP6;{+cQ?$;2kBwdQN&TkzN8*`(EIwv0QSzSa$Lzzu zHp`2cz7d;c`$stK`Wv&rpDQMA{Cn?f${n`52YR0Ex-b97;fkHYAF0~#hq-INcU=$J z_3wPWhqcvX7O>bvRv*&a_e3t!Sl-uG3^@nuV9ysj2dDGSKzqZbmy{dawtNoO*yqD^x zyZhPQdD+kU+m;{NHlI_l#&um>4a=4f&yV<~)rx%&n(xh}dcR!!=E3wG?HZGiMs?SoPeR%plhKtMR zy*g%{8&$nm?VM-IO#S>b#wX6&Y+}z;VO%yvydd&m{*10KtA6e#eb4ONxl8LNWq;`2 z|2Tibx^-)Bf1NhJ{oaj0S5Z~;4Tus`t`+RfJl+@=`|SX>V!-|%el?!|8Juw$u=L-)#Qo2Y?E51BshER$H8=iW-TIyLvq1Xfv-|_&MgKFPx^(eB z;rT-K7Y?ra&+s5;{-03!*UzN?)p);m{I@*(Kg0ah@6vD0&+-p+zZHHZ{Am4$IX@=< zYvT?7&%m7WpW)Gp{|qh9kIesKkhTAZwEBMrQTJaz+x|1m7y5mne&yBw49{Qvp80Lp zskN8p$J_j2`gm{S%8Iu4K3DRe?`vFp=0sl1Zf&-}!*T`ACEwOBRJM%zy=Jx1^4W=D zT9Y!ZSKBK;DVAaRbjSJdmCUW_rO&LoG}gy_#x@$mmWjMNbB(O^Kj-fM3~`_8UwB4e zVLbcC`Qx|QA1)u-F7<7t?3oYlI%XfEBi*0*7usA}6L3^&5@Y(ri*6n=PbL^;OZ zzsCvNS!|!-rgGt0#lq?amEb9rlA^JNdp%2kPDz~d;CkPW{KNljKm2Dnw7%n7{D;mT ztG*o%%hav0eApWtv*n7ZwM5+Y(`D11WpA1nIlY+Ww@c5eihnQ8`L6I?d+G9rKXqT9 zF5Wl6^Bm`!d;72F@4g;* zJ$U|nAus$bKO=`*~zl{;_oii-P(r&-lL@>Jb=_uSfincyXPLT>JQ%&cWx9yUUZ zPptcSe2Tgl_NWgd5y&q}Me*1|B8Q)mr^os3wU}Mvi!nUdL$vclr z*j#^e|HJEmYc-}Hw#Q0|Zh9$JC-G5r=BFpUi?&aD9niY4&%i@xtxVCwE4jBUl?&Lj z?mbz4a+UAxce%CSyr1WuSy_Ft-)_3se}>xmcmHP8=JQ;=PVi zUF~-CH~y2m@<;hY`hi_?n;ExW{c@M@)VX)eN++M5mEl;od&*P`g`Ot&h3oFkd9+wv zgmWy^Zr?0bsyPI)L@HMOJ%$ZM_;ZHs)?4q5a#r<ob4$-RMqS(Zaj{po_vJsnOMX}#KI;AG z^i2J&)@+wlb}%j}+uCMxcc#kJeW^3nc+Zq{^H{lu^~399zwCSd$-SIDPk0`${9&o@ z9r^3F9^1Foz@PX2ty?!2^`;ul`T0u1NHh3mTZG2`yUYIR|KR>wwCUc-%z|l?Gjsnl zu*EOx{S~`6H~F58qCBt3gaV_U*;};UB!O*w64j)@7CjUC%ip#Zq8kM zDDV8Oh<#FiaGpr9j^(DXO z7u&=Sv-qW#w?El*dFxuw>E4%1W?Xz%8mXt}6@6k=&DPI)oI*8c8{~EV2wDASX#39~ zxW-QP#rE=T)533sUhRFdtMuRWm5Wr*PCZj`SENz)!W^RwDP=)xmI(1ywExlO7kq8M z+5GTb))LEirtg{W-*hirVlzLb*3DWiJF})UW`SHz^pf81T9bmG#Vua?!~LPY@E@%o zg^%~9oj!Q^?AlkedS=a={@~}=hysXjlR0b7?meLYeYWK4us!cL z-9D=qUHg6doo%n?{=J>|{Zrl7>!(hK-(TDEgo z8V{@6+C;-;h23vW}9azm&BYf^`837c(w}bhtu2llzfVNwaaebwZnehehsEa z4&~I`mYtrpz_RE_?m?N244?PQ+R4ARdfsaz`{=hn=e>%3+`M-0{r6t@@;1m?drGC+ zmYq$ZQ=fHuExDGp&ivE*LpxagC;iy`&Hu;J{13%Gm$$?VW|-evyRJET>0Ghb9+hW= zx{vJKm0i8KnoDal%iVoOk+~O`I9K-F`(A%Ie&4s~+}e95%Bsu$GwAQSUS<2H)^O3E zxJ#zz<>M-Xq`iNv%RPHY^~*|$Z969MYoB*HJe}3%$EH|Ozq+6srWcMrlvUCfyHhbY z=lbumYKVTXANy&80>E8CE5@WnW!Wr|9#aLHOYZ?U%W_SE9JG#cxgB z-M{(vX5Xjoe3y);YZ_~Q>9{C6>396R-fer<)}Ida_TD#t+Der_@AJ?5&D}`B0drq= zpH8gIwB0SVTFWG*Z-HRf&-@4M2i6Py@IMyEo*RBR=!L-#bN3yW*L{sFHQCu6y~E&k z)M4E%-r6&gKj}6blytmLFx#%wfp2kyZDuy_&P7_xjhr*)MA4ALmPE*7MgW zudbLJHFNEkwR(5o<;e={&JDa6Qgp_Tfq9aL=<$mB$5Z4KFYWPo`H){Ys-oH9lkff3 zcM9A7T}+)>?(}j~S?kne?`rOuS>!JDSo6LsPWEH?G5O|s)*pVqF#J&-VQzGIiR#ra z#mDO=cuk5vbEe^^x2BIsLL}ppaMKU{2Y!MhspBgiY{h9qhr_qt^Ap{Ytini+rK{Dy`O$@uEO7Cd(7(O zE7FfX_Po~Hw>#Nv#kPGR@)?`Av@qLT<~Pq%zOysp=8fl1!=B5!A6+k+`H%NQO`Xz} zEghzFjs5m+o%i7EcC*5np}x1;fn}-@5bG>b88m6SK;&%VpL*&v`05hoAR8nj&gp@j9-asp9DVw0iL$R^OfWbO(PF zJ#Z=KQig8S2h~f5MZSmUrqAd*p56YL^G>qMz4FO(qrTsYmC8OJn))|u`~J*mx8K&w zpFaPa@t?td;nwpUFaD`rG4X!ubuwSCBzkl0^4+_WB+vMZ&)gzocvHD?hfLmyg9q2n zdTdkr`0EGO=glT3zMSrjdws-ePn1pIfrmcYPd#-eZ>w6Q;&c4Nx^v4HZ@d0i{AK=2 z&UnAvxBu$b9_rFqkO*6GX!>}*`27mIL)_i`oUiNjr$w4c$MW3jRF*hdW^R0_v98B| zYr*yXTjou(;aysB*!{!wu08$_U!RsV<$Q0l+QTqWBxTLKf@v8iQ?(u}adP|V$0%{` z{;?f?%O5r_iTPtR`_Z)CImK;w&$i9WJA@CJ-^vNP9aXvZNoL{f?%9RS!a7BMGpz%g{+!>N9&Mdpns?jw z&t6;ovWZ#O!*8#@fBF1Bxof-a7F{guwinE}Cbe#^#m>v`*FN60eWoLm$6aS-p3f%~ zKF5_lioYfA@L_+io{4m?e#DL~8+UKM{MEMZoi)x_M+4(UytfwO+TeiVK{ zUhwsQ27x`%*_jV_u3z)Vf9;K|2^9-VUHWeOY!6J@zR@=)HGGnj$rD4-&SNQ07au&g zt2$c0?LWh+ztY!pt=@bJ|5jdo`_F%d&~4w+cisFGy=CW*o%V-+ZjsG8_Os+?sfDTc zjD^wKI~G?=aunwDX=$6wepae)hq`FC!;jMs_{Fj=WLBy7)tIk&RXjan&6dA$_bQiY zES#gIJgICY%Yy@XfBlia_U9xPtYw3T6hUNMDUi?ZvJYVpS>SNjP zz{@85A{o1$EnT+l^lMx5fVM~3Vdo6Bl!e+A`e*RDmbkRCOm47Lx!zcJK2G3cd$WzF z-LzYF+ZWW>Y`W|tW|iN$&gAyYx9e=)v}~DncIK@+YH$87?S7tSeSP=6TWh~gJ-S(F zwMj1cM2F71&afe?37>tOD}V5NP{h4E{*{@_GBSl@ z?jQNQA$pEwC$pEK;hj5dCzFap{ojf%iLKB+uUooWIW@`iOrs$Fr$X znNPP>mp}BVzsJPP!s@tOTd!0$=;`)Z*Z;9EJ+ki2y?5Wg{#*a5R`P+q^vn5zf0UP3 z^t)&Jwtf-|+0k=c$8SxL$CLPnC8eo}H&(9k?~oUJYbRULedt!~x8}9+&g<_VUUq!t zjicOAx6W$k_V}FF+HjKno%Wovb^Z^bE3vNS_%S>7-vy)iTYAg7zoqQn zTbVxj{1krklaVv#F-}fAz}o8i+w1V7wXS=3`P=qzuKBUE=`Cx8W#X~Sn;~5<&rJTZYyV009n&^UuQuB+U2{8H*1Uh8$-xizBb#KC z58e9pLMSFW>g1z}u&Zknrd5hrT)lEm-QIfDqzxM+pFCDuN5XM(*{An7KCN z!^{5+96^tdMCCr6daeI)Zd9(Bhsgw$F3)bwsmpnGl-`udTjQYE&f@;j`a$2u<$s(u zedKTc(*M?StMszn`_sA(CWve6xUuc(yvlr}BhS97>$~H&#E-LP=mt#>yZ$u)b#~^y z71Pch-&FtnN6q`o3*YTZC+3*B)h6?N{MD}2u8Pj$*?2s`HhbcQeM-FbkJ7tpRQYCF z9bFI|dD?76Ol6R-McF5Y*E<6P&u>{BTfNaGjVq>c!s?#+d6~BBg6Yh-t|Yb zeak#dY?BXPkJ4LmEB0>Rb{S2pQ%YSijUt=$E=^XHRQSrm6Yl(IZQ0|)RkBC_uIpU! zqwA6FLX)@JCx5#vnP3-h<#8d|Qgsdwb6ntWJN>S=dtyK8#S8pkj`=Wk+0J!ln^Q|y z-hFvl`B-jl!lx74cF&yVbLj1KMkXmgCs{c>e0+FHTAa*EOA+q}-LJ0Xzq+*R_xl&zk0w>CT|f2ie&hM&Z_O9^ zFS>m?|KWbYm>*radH#I;T*pPXe`|_&=bZZ}&A7WjOz@fb=WWT0g^zkKsk>Y!e(hZM zw_N)O?s_H1qLe0+e`ltymphrfgUx4}x#6DpT|x{Y8x=j09}Ct9e$0{M-`I5f;hSIV z%?m&F+&WXo`r+8p%;m4HZSzgvXtR4qTA#&JbJeGbvWF)0Z}4!qQKf7Dt?GLE5Amgk zE0;;ov_g;0IF1O-o%~`X|#ihN| zwfs~H6jhAooY?D@y>0!x^)-@@eBB?;Kito8_r;{hSb34?INzty4o%07oqM}pIlF%| zzoKeMn@FPkiIgdCADe#Bwa$2dcm34GzqV<=+G{Vp|Dyi+6@jiIZRlHO6Svu*#3GfY4T{yeAjpJAe(@*nk`6aO=uIMu)M^V9Np zZk^tXAGwb%b6j=LdHGD;1@mtBng8rC5@|lixlU}`9_vSGAHJ?zFXWVx>vZ8Zr z{>rVY=n7qAi`P&({iq|>?EZ@ET+4>)HQfQJF(zr-bFc)dDX=|*TU8>@LW|z1O*cxAs(pZG9wLe`w>2X<=p80%O)Ly%YH8VM>dP z@yR(Ga+fRoy0S6y&dne4hqp)NO@8=u&*{jNoznx`?wop~l9PN!adzWv({u4P?rjgU z#LI4}m{r|d%`KWk<$_F`Av z_T^c#@|tbeeB16_^vKuwtPn%-_P-Ig&)3yWJ+Jk{^0#F1(i4wvZN91z?4NPFRQLX@ zy=SlL_}H$gjD8m6c{lUD@4CIS{!OfYBymMI@ZpZjTeQ7{iw>z@PTnq@kj~hXvO(hO z`xw;{Ii+h`d?Qt>Wrb?(K6(1|noPe}G@;1j_QvXYJJ;3isX4Sn>ygYtz0}Y3bCQ3` z&EpbU?&?>Cd*x4+M|*o&Q_4|6j}0uRit#;iv91?0-^i|Mk^>hF?PAR}KCi zjNev}{--PXpQilekN$sp<}XrN)TRBde*(54D!eBja(-O=NcQ?+f0q9Y885E|yplTm z_V2vO%cEm+AFE-@IM@x=!}v8#$$qxpUv`ef088>W+PvOVYM%Ys{_L z+qJ>;NRKy*^^DV3H!(1axM#-K&6^edye|7^bX;@w@$G-?A8y_A&i5nx@i>`}ZjDPb z>r8)mu6lj_*0co|U&aYteN%G!cK5M9eZzHuQ4bgN3QBIebW)o67-It85qT+{g&)*< z*Hk1QxTk!5%kjQP>KiV;U8^|R`?qmr&`~X48!t)ONG7+KNxfSH7_~nhe-PS!EdGc4 zx~H*{TYb4IQ@6wmyg0e~hR3F5CxxQZDo&?!IcqQ03_Qol-uqGg&0PMDl78NZ+5McV z`=+K=W~xoqZ&9A?&R?i>A}@DM&4&qoI~1%XE7DG=OiJ9T@-saC@}DpDtrM=Um;TQX z^`F7F&fH{A_;1&&vU}=3lI2@%#8>^;e0ZM0wrL;r*Gio;pJu;l%f&TL)jPZ_7$f%_ zJ$AWOb%*pn#lH);`1Adk^`F6evg>udHzuoRN9!iXZfz4zUK$gVW9Vv_>C#iE3<|Tef_^vx5}1(bPoDhBJjiR`R}B6_kIWZ3;b}?_4}ySW${?;($i(m45^)V;hs+0 zB9hg%)vQ`O{c!oum!G!&d7szjJ^SXef7*5S&woU`RsC_9XTQ+ac-G8$!XL%9*Dl$* z{9EtWSy4-uE{f56@g#j(Ng4y2&WbgUGkvsPp6bW?LVtI^mH#kN+W1?% zdj8En9X0#+?s#|i`hw$rRo7WRTz@dXdk@R>DAnuTHkPY@Xgzx$xMR}M8ea*-TL+3^!rPG-M0&+ z{(Sk*a9jTFnm^xOWmepOwD)^=onVdU&WkD^`47(*|KWBnd*<7`2OFook`%jfUcmn2 z=e&<63{MGFC~G;ckN&Xq&8_LJTdGe_|E#B{^Ke@D0qL0&gbFNV=3RYK&u^c`EzkPm zzEH`@bz5fS?8)UiXgj@ai^(P3oGH;JyiZ+JCQswtt>byFs&C%(56>O1@6ms#EvtRR z>TKKY>Wn6qy_df{-{{sEqLtUXZ1(d5Pn#J$ABHbAO+B;!aDHt5uS(9lpX}~j|D*fU zc-f*ZEiY_EBD$De#gFa3J?>VxKRWYIXw%g#_52Yp#l&{CtzDV*OZVHdrRfu|hWwp5 zF;_XXU$@XYhxJOXh3U$V$&a4**vbC!`mncm!p3VQwdH}EH#K`SAD;fcAbILl)tOsO zC)_#`kUzog1-Dj$%!I7_N8@jP`_FKE*YnPGU+P7iAHKF1dvP(tPHFpW@82=g#Wv2| zwsp_EERNKhg<5&3Qwu`goO*KWz{PZx2jF`+0(Z_PW^R%yKnBIhlzhzY)${M zT5jI?h4qKb)jMw=R`!fY`*$@|Xzz;s#+CEpr{!H-{Ateyr$4vP-(4Saq0TbHPBEWL zcki;r@_$rjwJlyARk`t)%}$-3N5N_`5B+c6&y-Zxo?v<0Q2x#Hf_Y|rFMiC8K3G^W zbx*8(`+tVb`|C`_g8cIK+>xlcmf2Dl_V)Svw?8xYk#-`1a($}EKkd4Se=2wG*(*9p z_2af}{~4O~FHir+y}P#j(ERRi{~5${{FX1PD2}@PMRM`yoYG$BH~UtAU$U-jQa>7$vo($1ns#0A~nylH&6qhpHB zQ9rlG*EZ?eF}>pdkbL+a*YwD?!$BX`{xP>=ThHV6Fey5+bF;BaS^nP>5l**p8fLM{-^G~deOh7`=NKC@o@#vj=a^N-9G zx^`6etFNtUJ+FEDR$jkH&pwx3-yVIslBH}_G{>g1k};oZwXgnDuF<`=CvfeL@*{OJ zA8q7hKgRWEmnP`1`*MEU?eqBs%Acn$6guLZ-=-eM->Q8g!DC%;#NT-}9`kwL+R0Sp zA1}M5-_+M-;(cSiiT$=LVP$QdOGgWzFLGDuI&Qsa0#gyI)2{xx|6=RC<6geLTzx5X z%e`y=8T5~Rzxj6Q$+!P3UrYHPJ$;_<$7ElrvS}ZEzLkA=x!ovshrjyiiYGy*%b!(F zdaCGi^l8oWS>Fy6F@F8JUirt$-!?Dr*Fb-4ucjOS+v;_%`u@eNGIMjz%zT|sGVW61 z$uAgAuAgJa_V4nn<45Wxe^eb0t>wz*7swfEuSMhxFuYX;8?($>( zBa(JvHHIJAyA(FZy1hEYkh@m)o6hCkI)&N>=R|g|^LSw#SNdW5WA^SkCHKev{Lxwa zM6UetUgDfxnz-)9jXTR`+63EN+)!OMa|y4^w(T5+deU5VF%zu)1^;O8|9JoKeTg5z zN8{8chqOoU+)-`4J6hxIlFJEcy!k>69upU)vRD`0coJ6MS|{>j_2IRT-c~(6dMj?} z$?7g|L`JLj!Ya?H@%zTL>@OwGq>lY1Ait|)(6cJKbBud!8SIqUB2zg*{= z`(trK{Vn-pCqIZExM%h6^yRqM!5b&N4w)y--Ki^HVCc%O}Fp^@uCqb%djH#fyTT@w%A(Q9)>dg`NlVj&Oa2pmaUNz>y6 zww4u5UzPnn%3k&Q(tYJqQ%`^1zx?O7*FWT5$}~k>y*@R8<5%$NUSf{|SYEx~{?mU3 z`-7dak+yID)j$0KI`vZmbYV=Os|e%C8qhgI2TOJ=`7mwe!?#i%-6zYdA8A{zTjVjd z_>jAa?XQgm46?ixKJ{Cc_tp45Jj?Xld)tTet?%a_5|`86S6ip~eEY6G&6d3g#`Jd}GA#J~M#PVS`TSsRc%ddGOQEvFNX6|o0-`|=0xc*N2&+w3c z|A(_DKHA?p*Zp_;r~eG;Z$9rko?FQtZ}Rs3x3hP@{j7Li<`%GPx%B&`|DK-t=l7rC z!!@txAJ0$EwywGt7H@j`KLdZ+@Aa+r;&1=y-I(j|^WoXn7{xk27*&0>Jv;49BXx_AsC;ErJ?=_R%zWzR)U;gR`?cO};7wcwEu7l4GZE`sC zh;4#jB)|Qi-25YQ0@rQy zMU>xs3!A04@!{{;r@L>zTqfu$bzb(1V(2Uv`Byhq@Z7XzU-DzS+qJE+dYA4WzV+%y z-ZSmfnM-$=X_lyL_Y_^)pe!JM#^L0iqTQPql{Wp!{_y#qaK7jdQccpM$n6KeHwLkNv}S(`Vf}a_nR8ymj~cpYQwU z^I)IepMvW)ypPVa{wR0Za(S!t>bc=Q+dhh3d8s}fba!v|p(7>b1vLO5gH_&W_r6P5;Tp>t8h%>@quMz_7jVrBd%Hw-u_PcV@5@X}dp) zfAc-{ac{QA6_eAe<@vX+x#U~C-ACx8%6b*Qyu{LpHmf&2c`LkOLhHn1muCf37r!sB zcUzo4seF6%m(5pZU0m@k?5pqn_hCOP7hlhuTsbfJXLQZo0R+b0*()MtX=c);v+_$&HnA#GdoO*2Cp?wt<4|8;a8yE(>&Jr)C)n?V?Cpv& zK2yh5@%U(*o}I#!U&d*Zx;2-H9v5)23=P7+`Tlh#` zJflu&+eg`-S5vlMwy`yv^C4!&!uIy+n+N+-xIaxj_sH)}&DQiI`}u7u*VoAJ`LHKm zaPiiUv6px4@2cO}@b|D!#4&}gjBoQbI<~KAY%_0?mQbzp_`W6ob!z>w_kOdPHcioU0Pki!g{af!xxx$C0eGAWAWZwC1Ph{!C?%2HZzI%3EemCu&u+Daan@;Zw zwVBrPPnab9OuOa>+qQT18FTZFT|XE#>CTH^``b(PwY1OX-S*nOefy3XcO}w$y-(B~ z_^@cY;T`cz35n@Ey!q+}`UU?p9MXI!o%qO}_vJmM54AscUC1|?&vTk>K8xDjx3@TR z<-WO35Siw)>0Gx_Q4k`gzK(lHI$@Zhj9_uQl8IFFg0pn>Bl`r)AZ6{OGKF_}BWN zo$6)P9o??GuD><8>vZnZ$pw=#@}B%Q$=wzre5(Zun2MrIOrsB5M~zmq<#9IS%p`=NJimt#v>a}sK!~SFd5V{ zWna)ZMu|gL#BSA?HGj|U+8iI+a3J7xhK`+Vl=kzTpLd)|%_&Wf+r4K- zpsVN$8Q5t}jLvyn`ExLTjzJ^Am>N49ygV%MGr z_deaWRbOV)o=t9jy=p?@huD2K2JYb6{Bi9bM*YLLf6I4P9h`3^y>JF!JO6>`bTL=DZJy7B9^h5FSdG?B?!-ww~=F7h{%ja8I^=+T!{_E>GkBJ1y`T&|lQ9v4;mS^IK%b=7V6=j)x#eSSQD+%NDdD!f@| z^5JW7#3ryIOWyEvzVTbhrL`aJkJ=gkV15uU z{%TsCMCG~~@5IbiFN<#Y7c$R(yW;KQ>h#}E68DT)8s~U$Yy5Qe{n&o^AJ+%{L;E@O zCO*7xb;vi&dbalL(|_}hRiy1YHS_5eU4?niG*v3EMlQc4U#-18>*XH(-?~4F>|5dl zKg4$~*}un^|NBFe+_e7;KHGmg?)dGHuP6Bm#y{>L~?d$G)xl~r|^5I!iJ{`Iq| zYLc9t_xic^nxJKoWxWS>toQpUeo&vYVwtW?mf5exv6*IXv$I#+em(u#_j}&SB}K9B zMrXCmuIg;EW>XNJ9BO+1=KBM6Dp%F^Uon}zQf1z&Tfed^zDA!|o4RBBr$002-BlGd z>3-^BK2^s_L#5G6%Hsaw>u2NtGfcd6FZ_PMw3WA4T|Vvl>+bz?w`HC;*4Qr1kl$*4 z(~tg7|srqcpChb$lWxq4nO-pH-BBj`v2=bCK&hB~Ip>;K5s=0ALK>HQ(@l3gmt z9;Icw?hM-87xS~RSx&w{c4N@xr<=;E*88pgxc$`YmAT6`%{Onqe`?WxhWOe33!m@j ztuv_jeYC2*b#+q7}C*Nl(r0|1xpgJLA1Pk{XAZ6?*G$ z8ht)CYyF|svCc>DDR277Kew=A+I!KK3wP8`pS-x|X3Ea#XXLe78H1A=?DDqWsNjE4 z-}Fx^Ytt(`*-NRLcV!o6O3wTD=&5e*_k4rsC7X)yA0Rm59x<}nIC@n zHD?x&__V@T#(U3P>t7jjK3k^et@4b7Np5|-=iVG~S;cAdyzA{l0yRx-?d3Xh5$_Cq)0$f4siW#$&#OO|o`mcKAO25B%+Gw(g!?tA14Y zMSD(f!Dk;aQMFGF(|$yxrgU@MRGkr?_|e;c+V@BPEz3WYcmDCI$zF9oGK<~MH?IE8 zXv@I!WG#bv)OKe`{uvU`8jy}PCCW9IgI zs#ZC>rf!__v(P(l^YL#hdS-u^q`mCX0iMYb|CG1BjuW<4n4=5*W8Y>muyYY(?s?i4)A zwd1v9>iHSkm;XqAJRSFu=RTj%v&(+Yk9@OZFRwcic!JA4CGUuv&CbK}o0mN*;)`Z1 zGQ1U=y`Qfp_M^J@<@h5~YKtHD^Ld({T{oR?Rol0V^VGWBvP)}^qJ4`bVobu`YIfNhSBr5q2$d(>Tnc~UeY*bj z?)}SM_#fq#-+upB_j2sHU@DxTCVJwB{)6|edzjb6WZm-J`e)XZ3$@eVMlD-kxnfU1 z-rc7zd8w06^_TIT__k7^@W8rjo$E4+j@|?wWCkwApHFE0@Roo%JeHf9|@xFw0Fm{>qaDYML64aZTX(GXG|E^tJqwc*%@g zhhORcZG6POVXJS{@3+?rT)u6)&UP<)(wVf3d5=8)96ICSIL$We9WBpG*z)GtrTeM3 z{%niaw$9(Zb?dH4`<3$3*U#O#?r-eXYqPU2-En`*S-CE|UU-l2gV$`=>o@D0+;tA# zvHiohO(nBucPh<0C2F4$?siis*4wbh{QT1uio4{^KDs{d+mroUyCVFN+w7J1>ks*7 zE|~e=wDqI!yXpB^8{Vn?x#`f$`&wX`TbBbf1H&uU>i5U$`D%=JeE2ri{D`mHX@|=> zsh@I6wbvH;Dn2fEYE#+inN(wy8fpIUz(Ka1^E>RAen{#qeE3)RsMWgZOH7YW5tGVq zzkO6V^V;=qCkjR4le}B{o?5e}6jT*oTBxlTy7&9<`}fnP^}Q{099L+{RNEV^9@Zt9d`8ZDjVDjxxwE#LyeBGt z?x`-0_djS9<1Djr*DkBJFz2ZXJx%>ucNeqa00*Xz}#-$9qc&(#_o4yn%G z9dVaS;5AFP(y3R-B4zRB~1lazsXj*A9~MGqk3^q?t0G$)qC~cA1Skbo4@9*>9W1Kx+#;- zR0Q2!TDm!GLZ0EVjctjOI7JpNxm~;8{pa2It-Ehs)o*|PxBkGpsJ}~VLRVF2KgjPc ztrdH_<*%Q1N|*FihOHYXg>;-T;yA^wV`MmWz10uL<*%jYciO4e*j!y-6TU1n>|0ds z*IoB3y1dW&om^HuWn$0|k58-9kQL1m3po38F;-?bqqP%fNU;gd$eDmv@Ynk`H`Pa*KmAp8wvT@JHb?blY z))-ye@?IcYJpAL!%cf@!XPJ9jdvBTX?%7UBAr2=?!#lg2)!tb5@O;}ZVyEK%_&>wV z$;ae$xAx|k`tv;w+0^lKbzy#S?$bL5pDo$wcH*5`s)N=^gNo)mgZY5SySm5gDMM+&!Hp5$=S ziPmh_wUf#}$6r5L`+Vi0XA#L|u}h~2?VA()y8q*UhQpU1-*rDEY`5BOe&&MLhOdQg z-)`c4>@LzR<8!d>3g4?I3GZ&OS)MHumA?D)pH%(9-+K;fN9Heoy#9~<8I46+I0P>(F<`_a`nzU6GPU+%4#kj&U4-g*uk*z!8WT|Cw_E( z_|Nc=Z}!9ce77rZM@yy7@m6h{z2dc;{_btNc6x3|*>2unv^hrkVX^S;Hm*GhT-~<0 z_y05GX4bsBynWKvndMRO+yC8}y?yHBc(W??!}mpQRy;lsC-YHkmbz5k+U3#rqq`T+ zY$eOqyg)01yRK-nC*>-DZyI-4ltmQ1p zH`^NvuE*!i;p8&(*DRZAasEpAeBM8%EBqgN&i8it&(IR(Z|&w?DJSukf6|45j)Rdk z+f%s?G|p+9z*`un{o(k*-{*UzQg(e?xWxJK?`GDc+a{asxZqj!VN$}>H7*PF)?|WS1oBzyx*^QXP1x=sMeX98IzToxmZJQLH`hC=wHmkX}HhFoC>Xm;cXHD!B zE^m$R*p~YA-|^{3HpO};9kcCM3FEa^;oUGHRw=m6(q_)PjQvc1_#eLfa&pUG;WwW$ z_ox2Wu3cC5>EBtAi;mytJkN?bV|lLp$&&`Rf?dl+Uw)5N|9?dj3*s;{m4=V@>2a9>C@7)O@!X4#QJI{ti!c> zk`bH3`jq{1-{$10^%-kJkC|?a>2LbS_apj%)zM>Xm)wp&xzd;YX5H0QS>I=MA7y)c zFU0Lxq|J)&32*wdCb7$Kz0H{QtX(H|?*8-t*d9mu?!Ee-Vf{-v9J%)X{g>4zb@k@Gdh&N!(q=d3WwAFa6j~p5eRBO+-&^DPA^Th2hi7w( zS7lwlr*paHuix{3LY!Ytcb``};=L``QOxr(&+%%TgcC=Zp7R{3ZkxWm#`^L<-mG7h zA61Xdf1shCuQWZNHY!#%=VRHw6vNnK7k0T!de`f^ebw2hzuw%}lP`ktRs@G>q&Dfr(I6*V$PQsi?nLD>G$}{_P-+$-J zdk5C7*%XRItg%3_g=5F=Xc)9)Dh22Lr=1ES=xNr2#L1tqb zpQr9_b^RZU*Z(o!(+W0e@4nJg)wMHz zuHE+A`(IbT&Mmupcd~TR?%e#{@t-dJ%HOofuH?t($L9rpWXFGu@6OHplUVZM^y0fe zjB44UmtWA!i9h+C3VY^JjmV;JzJG7&@83Ur-#5;zS?{Lb{};ckz1nA% zy_I@@vD#DZoqQ7MJM6ZecoEO9it2qDWHtV(N)gQfG=SN;M-5slbv_CsjZ~Ka~ zwKj#)(w1hNs%Z(kpA?A{n9DH6Eq%0>$zG(Q?z{K2Z~qzm`<^mwJvX!L+jb5{_9s(0 zQq(0*C_I?;d47`|_mAjBS@V>x+R6Or%B_0#=)tmeuOCa_%`+@)WjQrD=G1C4<r*o!(QX__XXcJhkJ1g@jC? z$x}bW*Qchh_SF9Kops5R>z~bE%+7xwE$8~-vdgu7D*sOWJx4%q+h9gHu2GXxy*UYAM88V|8ZXNI)BNYZMRlM#@-doRNb~z zDaKhjCnA6D1l{HDXU0tVoD?a$F|O0Rr9S;U;~&$@YtlcSv0`sgypq;->{ETm^=q$1 zFELHJeYoWM!s~h0c)4~kw5b-)dTV1)#~1&{;$n^T!?)>&%k6}(UW+JM9cv!pF1}`x z*u(`>Rqm^Ft3{mXiLpQ9_bzMpo;i>2n!e84UqAPHQq=d5_*p+IYwyo0w>(eU38@x5q)_2K10Uk^QFnM+t%9No1Ct)$!GfZXOd6S=0!4ZnZ$6}i+hL8gY9Qrt3stz zF1@>d>SEX*`FnideZOsgtpC~3RU`;Dt;ztKUhQ;+9J3A@up<@|7})D8kP>HMGMmR+ zv-v@7ywipH2lMAi?z~>({GWkii~NTk^_%N2M^5&d{8mXVW2$ zwUUqbOTV<|t5IL^qd$7)!^=4V#yy>3igH!15S(8+#LLy6RcZqtldaos>58b>wfCoexD(}FbZOVLpA(&ppFByJ99h4` z|H$^MHNFqJ*V-SQ<-7f`th@J~9loVCx%TaE^wv-Fn5O#g;?}O%qX8$vluoTHPp><> z^^5NNqw;K-`vjYhme13>u(n3u&Sq_1t(VBA$tsQ8if;3HC?tGamw$Z!mh!{v+dlk= zs?9z+%SG&-$_|e;diGn^PBZVFI{U5lJXbcxEzC+!Qm0?rw(9Rvw{Ls4ea^b}XV#mm zXEN7$mEAjCubNx$e(%})Ly}*s*SGEynH_gk=j-0YFE^xK2W?2p(7UtX%nC0>#eLt( z*2gt3Exj7=e?_;nnmhZo^o;zRb;*$nc5a{GDYN3R)~UmaQ<^xGn|LFZn^&4=vxF|1 z5a=q($|@4*Knj-Et-BC$x*f24&!u?>b=N$c7n$U&xMNr6$umMi%I**NmONU% z>&L;oIz``yy4r`MzIUW=n#8fy*Wa&;F@2fMDoL^QX(?&ek}+$%6+=^1c)Aq7)kgo` z_GQbO-cNcpW${%*!Ae$FDm{m|ET^z%$+~Xo!;?Bck!Jc`Z1F} zWypQIC+i)2^_HZv*$3wx9)T`vE}iV3+qB@EOwavxc~QOmKbaqPAKLt5)vMP>3@aud z`S3a_`QDz%O9KCHxxMt1@Pvgm%X;itL%j-i%3WVkliBS2@!RZ&bCVC<`q!VoDsHa1 z-}QCVuCB?~bIDieW%9eM7iO+FPc^u2PtVtH`}8lY@9o%jZOiNCdYubV9P1wEZ}Aj+ zZ>oIo=S?%a2S(w^=@KTB{@#k)UpxKo(k=I{eNEYC{^|GD-(UCbeg9%#+25&uQnuGw zUbC@R?+ibF{pIr6r`HR=tkc2 znXVU|T%76*MV?DdmixMXyZ`=bnN->AN$*&mX*b_vqANw<{*;A6c#o z{M>kRi%`yvX%8*#Y>d^YG;XljrqaME+yCbKV_y9O_j&)R|F|E0UH93Cq#sSsgS+Fm zZq>{#<+^oS_3RzP6-O8zr9Fh8B&7I$J+n?<{@KcIRc`YjHWH%eso$(bC~3^|elFd3iNN zPBJMAyb|JeGBUURpXbf}VP9{ZT>I`+%ys{@`(?$tzr0U>Qkj4BxAys_eOk@Os^V6Q zF6*uP&YhEbMTtRDp)c~5=92BHp38pgWK6V}<5BmpUFG_vFoMTDUK4NsXOyu|_2bd= zZMnMZr+FNCBXIA<=sm05Lerrh#O(|_G7vI1PD-8#F) zZ>7q$qbg}!+{X&Fjvb9L2-vteU!aiBaY31ygyS?g(12^V9eBr#NyRq?;bE5&0SuJ8#<+I&zcGI2d8J|UrB3;hze(%P3P}==so!Q4}(?5JaWVOB1 zbmgr3UYFk9d-m`m_qMCDqV76J9xzy{G{)76ztvpm`tU!4K-{Zc{2eB;@vb|zY@L62 z`nM0;7V1pTS)H+wV_C)hb7{AhpE;n|{Pxz3wso^!r+(jSSM_H5=TzJ2+5309fBIZ) zcjCkRwko^HAG^Off2cj*`t`J}*K@~9d2dRMm)vtNRw+2qx_nE9sC@N_l>#^9BpDRe zbkD2G7pQUC^<(;PA3%Xe(mRpL$exx79%eTQ-G**R&U!DS1l{JF^X*;+>PAlufellvxZ ze{^l-x9q&R`8&sQ==o;U4N_1D|Jy=%*l-tupLbn$x3m86Z6gZH%iY%xmz{ym6! zlj(t1xBZ_+Pwib-;qLy?Y~iB#tT6A&;zf67zqP%4Ti{$$#h$YZ9`>D-;XUxU%#DvX z=gRl}^@shh{by*Hv~|+0`+fVD?iX0pr2%WEF+Qr_mbdreeg1!E`;-1~uK!Uv*^fT=bT8UJ8an&i&hjp+@oycbL)ABk9~?}X`H>-_;M__9j1?%j|n?%gKG0m%TE(7#Ho^WOHIkz>6C;JxzSPmFGL{Gxl;H zwlms);n~H~)3bGrzMb~HYWh1P(M+oL?EUHADwf>UnA%-=;pyVut2IaOxLdrMJ^9Gz zli9WV-#&T!{pGsp+1@vvHhJ~y#7(>ZbpPGk@z!#QAN7yg34Jtw%lzS+to*S}73~q1 z_PAZV^F!R{w??+4i|}#PKSvXPJ87|5-dE`TWO&Z_xjnP#e1U_H(P{Hn z-#%~BDq`H~Qsj44VGbW7%Qb_zN87{ZpY~qww)Ru@(%$qnOY5$E`LX72@c!K3*TqNd zQ|l#l@AsAV^PhYa`tj?p%x9loANdvEt+zE=Q~LZZp=`_g^5royPn^ZVv$p##2)}Ip z(qx)BciurgDKo*1Z*DyJSK9vYe8)ebkA16Gyqv|}WfEH)bU8Zp>%7@3oC{4?r%Ox~ z>~x;seuiCXmy%PH=K+~XnZBBr18cYc{u4~=6y#Kyl;Ya@#Q?z z-HNhHD%-E-=If+Jybn8YSa(ZP=_G@P>L+^|{`4fTt>t~l-|=m`J?FEJ+xj2;Ua6Cx zr@m*)WfPOmb@`7fZ|I(rInuaPV#d2X=9G<=tGCrln{cmh`erX2r~NvY>$7inP}{YL zDR(zab?g)9I_k6L%nA`h))@?$=fb~Do;Lky{^{~x`*%MPj5?+rdwt#MU+*vPZ2fTl zv9jw2=RrtCvRVtwdnhtxW4zFes9^Fd+XZE#Go>- z!0WC*|NgqGdhKss&Ev=Sg|p+BejI*$H_cu^uk%s&#s%hWd*mzA^KEYZk&E$EU6C2M zwA8capYBbboU_VL1Y`3dXE}PEdj&tsQ5W}Fj@Q@xX#O_o!@I^u)@$v5Ye!u%S!)-q zVd^g!d6;v<<&<31H5(X%CT96$CisrY=vYTm@u%=%o_851Av z*zIv7eByEI+b30`V`s2VGxzaWaa@w2c~-Wk>+Q7v47%mlzTMxId+WNfXSLb-rHj`; z`JJC#d9%!lJ6so+^zn!!_2 z-Y(vG(Ras{psszZ?q6T|Y5$v@<>~8dqWc3rJX@Q(=7m`t*Y?Zr=KF3IyLiL%Tf?-d)L<6{k$*A#lP)RTl`47<6Fz+E$h|a3QcnOS@TlavFFLO z0t4QX(3+E)djA=$Pi~ukcFV8tb`-;-G{Z$=$IZO>d&S0`R~)?RRZ zu8n-{!(*3feAh(2xaIr%*8aJrx}T5c<}Bh|Dk-$5(A?sj*R=Q-4#ApC0}3eYEmV?T7A%bMF+NtXvoSUpzXsjL$BiTu*QrN>uU#vA+2%D9`HdTMf~ z42LJrawGk~S#MYP-z~d&a{t^#Yrm`fOSryg{mt6z)BlKm!hQp`#-axF-DrgPcsKOU zt*QT@w(=wY!@1XHFZ{{7HtODuSK9*a+MQTF>(V>zshzjkGwQX|3JQ1IuAcRA+wPCb z$Le@D&-VP_w*H~_i_!|#t9v%TG`E_6lgY4ht@&Bi6$MWjjJB1%w|34I-TB+2#`}t$ zV2#R^8sBBHFRf~?h))x{pL;zyQ)lhsd7mUyW`#y3sRpX1{bz_+{PfAA9rGFgU5MJ= zxlg*r^_tJLI*aDR;kwzmkL(#-xuUZS|IV^ZnRU;4^|Wq-=?RC=7x<*`%%8UCrCa`z z*!!!V?*Dgd?w9Rnr%j&RQ~mP${eRqH*PbgreBV;1cgaplcy&ej(eH99+deK^xb9`S zlO|1dZkB*Avt}7(-!uI1Z)5tZ zSG&@t>uwS9TQ%v}Bct~+atT7K1@{I_&0Q|B?qi)|Y-EkihhtlpSCk*IGoKx@W&6~` z%a1pAy=*=te|GxWSrWmv2WnCtIO4)|+L^&OSLNmD$!lWjeyHaERgd?ZX5{NzTyHe}uJ-nyyX%&( zeEHod?nZQsZ>DL>reo{AO=snqFv%#e%UCn`dr{XT8F=YC~UYP@ODt0N)nE0^rrU8h-ly=;H2 zx!mcvuKx_zPsjU~+5YSL`Jds~{w*Qj?W{g5Yk#=bIvcnB+zdU*5_-PnzSP$B9Z~Dszs9=9eJJZ)STXx;(2IHE)2m*Wi(M4w zv^tS~TT_vFOE&+Bwck(5toZ$UX6}CmM*E|-cKNyP`lUDjGem9uVeRkD-w!7(@ z?`KT1&}X*tkepqRsN5sNWZ>}i)v`5S<=FB(wnInx=hSD`w@sffuvL3TiGJ&(EIGqz z=Oj$}J62Yw=b2QBxuq68OWhT(Idgta;Evj({=qTZ`|Z>B*41~#XJw^Cr}Lf*EMBuHG+aSnZuW1Nnxd=w#Mezseb_Cn>m4@f z+tIJeHL{B~ZJ6~bZD{o&i$+e@pbYJc8(HS>G?Kfb+_Hg7!ikJ59XcGS0$ zyQ$@3*txl@a?|UNedoA8cgOqsPd9!p|0mks@pb;q*$>|}?UMN}^}X@^o5edW$@y=x zTP!=-bjv=MbpiZlx|iI3PxaLNtRHykkMf7Wcw5u8d-tLjeR5Mt*%p>|?}7O5vprYYe>{IAEB&qV!}Ol#(@w=!A2Z+f?C4gB z{TEGMzBBF zjEZ-=aI}#1NZ@X3g^D~SRc^!Ji95co4L6?Z_v>@j@9F;;Zr4l4-%|_on)~hL%lFYw z_dl%8D7Gj4$Vb-V2mT#$;y-FZ3mRJAH~NO;4aXm zshsrbd^s1}{Z{Et-UDki?QDK*kNi-7@IQmZOR-(L(>fom^*Zc(^~>uUYV)3HsuxY- zP>mD}Q4Bk5w5{Snh;v-1?3_ul>u$&Y3ckMf(!OcC%RRU6>fT=~^e1=wv|AJYl>X3M znf0rsPW{8WZqt8TYif4ij(Bu=YLgE8iN4esI>Cq3B2NEYtG$B#q*h^n@Wvml#((_V z53FChWqV+T=9>7duy=On^Uj)2m9h3X7qjHd@=MAw2|pgL&e$)pm7n)V+rgYKO_OI! zJh-eE_bOnX|8$kSrPE%Y`K+p{=zHO3yydgKOJo{qYcr2O^sUj4Ke)Owd-dzuUtQi| zyVlx^et)^>WqRp_u1`1OqWtrYtUA>TKGtks^bf`l+DG;Yf1KWUJM_bqT|9QY7vGt@ z-WwL{tCw``9go?y4X1O3PCb+O)17ceR>}GX@7mMyEA#S%F5QcKV$o-!cl-k5X*nhIgQK9uH?KmS+UG}t?V=lJ*Dlt$|p`sn{?}$rTL;`B2Byf z9GCLdKbOA_d2)9ieBwlpN-FAcsu7dJgn>Zarz^hf9rgn8plT+I@bet zZD^3rcCYO=d3RdI>x^-$-s?$LJlET!>#jCAms&ZvG)S>opIjMpvS`QcU+clU&}lnFD;XDZCD;9;NP zS9J3a=VP(IGylXtczu`aldk`faAwcVo(r~SADHgV6MQ#z>%1gu*>&<~)C+rsEARPb z9Q_?yu`QnCmHD-8zw|bhWMAug9eer9>t&bUKdSbwUTw7T=h8JtUKVdTYtk>{rWCE% zT6O$redp(w@$9DkG8I{mQo5Z|uA~d!+@$+-jo$h~smc}R+DG^3tUPF;*l{*{+iz2m zzb4mr?b`b5XWS9p$ESDg-CKJ3{q5!R^SY16*~c@)RT%s9`|P;-ZjbZoql=Hq@QEDS z7c%Ljb^0nLmoxQTZ(eRZq1qc;8OG4R)M|S5`}$9pf4x)9){9;H*M8Uif(3Xww^55ua}7wotMs(! z!hVTgPJ8x0-rl(<|3N?ZD?8N>_m)1)wfDTZ<>&hK!bilW3-3Lxa7W3lFXKpb!?8~) zmWiL_exAF1I<@}f`s?1=)zQIT<$D+J-u`U=x@&vCnA8~R_tsecuw{Mko`}8{M zl75MAk!+kqNw&6ky~>H{*L00$cZN=TIPr|I!F{>o9qZ-&f24=ZkNNOy>dM%Nlgnb` zqFf7gdyebpy=$ChX1B_d{bgMBqx~{JwjZ7^yNCDT*IY4wj*4D=6tl6A6NgF@niFEU*6RbGw*%PZCyWmPfYD1-_|_~%y@pbzYEQ8ozGyC{IIt8 zpm_N_*}luat$%aBouqqqr{=e6UO(ynT~Fq&FXs9<=lzKjJnl<=bU(V@Q=|Ry_2GR& z6LYq%RgT$JadheOcKrvFj@@xqnOFVFUpeZOW3OXtX#8`QlBYJzzoUOtAOELx@ym=F z-ABDDS<3|TYgamEZTru#uPO6^%JG7?zmII6FA>}BHQS8e)o?bi45 z*TtT%Ew}IX``15n`;YDGPTkcB`8#iq;$wccib=J)TWa$iepIF|o1s&3UpOXnyYcOq z%A1^-y0eY9?(n_&+gtqHnez4Jd$&fv*6vPI4a>M?W8I5un)h5@&fT;mx9_fNdAK=eX0FbsIqpK>?fr|^&b1D? zb?!ey?>h@Sd6F1c!FoO!PM=GC23>0GDMWz@Gq zl_!R2dCxrUb2Y*r`XBl2|6u&Et?%OD5C0jWKE%$y`L3j1_~nIHAF{+bb2hqLN!vOy ztrDA@_Wj%=i3M8A_S!#u-uWl}aePBnepk->Eie9fu8XeEzBxDO%E|0Zmh;b!a=Qz= zo!=xDdhUTY&*ZbInf9`#)%$NAKDf`QT3dR))Q`nJ+p=YiSWC7|`xQ}h_|vp>fko{T z|0TH;ZdZ9UH8(UoxK{u5`>6ZY->;N>?`pesd+V0_r;hAr`?6lXM)U7nJK2n?w#NKN z`XV=?GH>O6ZJl!QrtTbhl_JaZX`vGr^s8<&fA1yRSk7Q!e=GYTf7hSN$G&mz&L6JY z->H-L$He5Uu- zN28X6eRvghuQzkWo#Pp6HmsR+Xx%|(V}YH?4W1V7vd`L?)?|H@Z?Mz6nelS%i+h3} zrhiK@pDuN8^TSx@(3Ugb`K8^i3&c2!u4)anUiMfwI@B+8t$ydpfBye?cinxz$#~hN zr4?^ee=h&$`DOc=Cw0buSU;NgZT0+;Yj*mG+2SK7m)UokTFbf}l4XlbFl?XsFy~(7 zs)-CMIU?>0wp+06;_tlAQrc_(NOXEfjr@}O%(E-*n%@0w62{H2Gcw7*^orjKITh!^ z-&ewS@owhtxnBIZU$$cNVR?yva+zl57kNMU&meT?e6mS?u6F#IjW_SauzR{JGT6T} zaLchHPd-_!>1X*Jl{F<<1jn=2%mYTWg#aT7?E?VT5 zCLgI|_@n=kZ~8~cKf!A&f*v0{nfvhLl9RcmtK4g@Z{Kuo)1C8zo+&rkl6sUTwLic)wS)z3`4#Lh1#v)JgvQ_mCY zq#xKH&n-V5XR`Cd`Hpuc+`o@sa$7BOPp&B==#tA6*Wy%zY3}zvnoO+w>eY6q&h@zM z_2cp#*Xw!q@XYF)ExKmc)^Cf`mqym^FTeSC+M|*VPmLgnMQrI>94dN@k|*wMuGtuL z^TocRs5quBQ7ze|N%%+8(j{{OK2LWchp2t|foa zZ!^{QwGZ36DciR5>D|L7g~v5_>T7en6KSb!a$-AtG4nse=4-p{5A4;GK5KBhVzcw4 zU%ul0UEaavnJn9VKCX^!WnI_8&T}YGW|GXQ6qb$o?fWy=i~QJ;X(xSYeU08drF|b6 zFC~9;j<{ns*>rQ};=r~?*1=XM_j$|>a7lmIEPI4O%a{M#s*h}|m;Q(kpLN)~-F?>H7!cz3*m$+B)kIKJp$^Q}kkz3{ZqTlQFf7olYiUfMV&XtBOuVPar zaa|2)4G!Mxm)m|^{_yPmqvZ{HCfvMHvV1@8-HdmvEl$gORBi}>s#t33ZPPIm{QP#_^mUP$d68#(*cUETB)GbA>BCl24%u3RHesxw_#mZ zu}Gh>Cil|g>bq*SLh*LT5APRPIyrXN=FQovWu8yHH>G;cOxJKdGn>)UMnvM|@tarn zo%;UylKCI0+l%ULGe1{*XuPlgy#33Uoj0=XZ|>XrD4ywuy5sdft`F~){?Y%vbo>4f zXC`NcU+7@be16mP;eD-yP5WnV5wlkfnEowjXZM3;tsfro#vl5AtF$?8X3F(FO4}zs ztDNUjoiXj1VXjJCGHXIaiM1QU+TQ#}xzF4F3H-6zamiFx{&3Z_jmcSwo9*QGUE8!j zRmi05n%N|sGva;a0$rBpMNj+xcB{A?{v-TBy;$b2)P23FEB@{+y_@*QdPUZUm+x~G zJJL>xcRdse^OUas&#;QSon=nntk?HfEdO)s*Y|(BzLyy+`g_Bo?DFMb|0b?}6o2Ek zcI7|1B$d zt{=?@gm2V1ZMg90;+GYFGD~^)t=P8l^@rIV%Fz)Q=SW`i@O|C!FE7Zr$zkpL-v11O zHo=ci>(-iVyQuWRulcb&UuMv86QjLfZ|}V~zbL&gQK45rx?_rBZ#To)y%i#E?-pg3 z>z~SuyZ^YlH0!;$$=pfaoBll7694U0)Sqd)fAP%Uyl$m@m&wUon|Rj!tvWW5`rVT~ za#i-8j`318I+R!Q!Dh3{CyT$=9(b@!(A+2TcaAAPcg6HWi8ax@$D97BuekdvO0TD6 z?)C{=KKhGK`d(KU-pYDrc8Js3Lrrh{D(uB8-XGmBV3YZv*DY{ybcNmJN7*m7ebZ)H z9KfG7Jz(a&WJNc=k0(2hc!he+eS2d4>Q$Nl88|(2>;G=!wcokkyF6#ttSg1JVf%lG zT#m0d{eAzWT=udW>0IlF`ki}(X8k?Gzt(^C8PjNsKQq_*em(Q~Pjlfjdwa@m~{>dF{gT);~tGQul5>JN0s`bIHl)C(?=)eQ(cJQ8VZH z_+_oAX3@UX{a^nj{C%2!?>~dt3i2TiM_aEu^eW?u(*m0q) zfBDwcHMf|~u05qtIx*2|$z$cx_k(|5K#-2cq9^LO-%c`BFwn5*~vFdTmtsj=^^!v%r<$A5)T-ZT~a%KZEGL**{yp z<$ewO&(QdzZT`!qkc_kGEAKQhm7!`=_uI#>T^$a#6owy!OBUCq<*h_LF# z1&>@D6)ij#>o}Y|=6LeM_BWqb{m4HSC%NBFuj%pOdoou|bNvo`#i}fMa-XwXWxHG@ z@6#=Bx2(xN&cpe$edZ7EN9rA_KlWBXS}%0-b+d}6cfjV~G8-!L9sZr@dNR|c?a|wspEW;!+*0{zAlYX5`1;aL z_0Go=ofoeR3%<~NCA|9&_m8cQ;-!8x2VAmASMBS5#PPQG{vW0P3^&ZDnVV&(-tgEp zE4=HM;Z(K=(QuU%p2oP7_13p;-0_u33x{q;W18x=*z41ep!%}yU#~q__sTlI{@Cev zmyY*G)v#WXe{j5gkEq^e-nOL=qkrU;@Wx5>9y+pZvv68k$D8}VCu}^?*j98qB1LfH z!}Be>zPIjSe{|cLU&1~A@qD4!^DFOE%rseQaJXDaCFA2Qn;x4TkNZqRG9@PL*{E{8 zZ{5z-5p}2Hxt?6s*H^l-Cx2z;rmX9;O3&Y1w`|hSThE>*&sI3{$*vF->LMU(K*s0nS5=l z=gyG+(CVN2i~cjrDSNwi%dW|rnqPd4oA-CWX2$QAwYe8++}+B^R-n?QJ$LSyr1@rH@VC~I+=;4JHcK3?`4`nBz3|b3k8*n-?Qa1c<tvlyc^-E#a)rGQ{lDq ze(kE7jO!tZemC_I94{N z;(phj!bkBf_a(mih6m339du#cnUAi|@5FT*@DzGORZ({H?R* z`sXJhT^3iTCNnyonr68^aqrxd)5S zdTUu@e|`EN{^0w2+&`@Q&v3k+zb5&izi7oQ-yqq%XAj@Jdz-U(+hpIy8K!%@7?s>w zrwF_~ojft7_xzjtD_?(;Q{MlPzej2B$9*eHm&M+GT_)v!bM2ei{*%vc;m*125m|Fs z*@I`k{j38y632aid;M4)@*(`deX&=+bZ=+=niFwzt?nH2C0RK2PnkZu(UlQW51{?~fJ6S;{T{$Q93DvEFCPl^VyCc@o#8By!%H zuI}#M&1^R_vyA2Yi`YEYC6oL=-8r@4gsk()PwQ&!AGIIp_*4JesN(R^Z5!9Nt$cVV zYg(YZX`b%8+&#Nz*j$;WaZ~o#nH8*&D$=o68?AIrqod6KGw}6R<}RE4w(j!k%+ft; zU$6gTKRxf?#>W>Q{b$(z!|MJa?|k0H*@AQPo)z>cU7n;Vc{OB$_wJ4fq9>(0ZJ1-H z8i^YnmfrMMXTj7jkLG)4uebm7pF#i3lq-|6v+LvMKM@IZRch+eSOA`I+cW#I`k{L9 z8rP5NZR@|pi)Y;Oo4VL?SFU*Uhd;Zf?mc@$;i&b`@@Y?IZ17s07jV4JTx)9gw@n*% z%>P#N@!Z1nHLs;)zfD{c`zmaEl-gp`ojc9BVYrmqRVzo=gQm*L4z z>qGSqU5~f^m%j4;i+gIjcYfG%v0~c2lR2qI*#?fwnB`V26N~NK=ayZdqOG7f=V95T zBHzOKf_7pR=a23ewUK|cSNZ5Kxrxgwa+g(XxqYASY`M;{j5EP`bMup!HR@vAKh5*| zc(m$h1^>~%LQm_oex%l@Kgt*S&v2-;eq~gg=d$B#d`fovs;Rr~u6*;oQpw!4BrMg) zWE1b&Pjk0yofBUEH}huRFO}J17xOp$+n1mB-lb&M`$PU>KMo(;W&3Xb<8JSM?u=W< zznz^nIdPG6_GYDaL3h)OJ4!1qS3XQ~x*?gj^Z6gq-@5-94$0R)oIUYT{nokSzxAK~ zJAdTnw5#(wb(6%6l$UukkFK5F{d3E#pRe{mTyxV_-Q<_r{H0Ik zKl%6XyZ7aN6rBjWUi^pn$Lu5fOh2+MTvBn)TUPvt;BD#sKV1JaY{=Ki%emmaVPdJt zdeJY*T09%H)_I(eWZFCFU-{OJJ7!s&Sud0Gs(H155vx|O+tjBYmi&90|9b7Qb?{1l z?ho~c?>qO1en`51uqtok(pyTgf!y1S^)Xj6CfGoy{44ytSjKEPqA z_WWQy-|Cf#gBchJh_iz%R?hlx=gl(@7(eES>moD$v{ipXI^WYL|2A= z4-K`A>-@RDdcE(wedV(Dl}C@~MtzE^n`>`;vxfUacF2#^57`I*v3@w;{Z8kdiDIhE zudQ!*Ypxd@IhEd_@@Z#hYQU+55gpCE&0MVZkL3^Ri~R|_{8gX-X2rR~jvv}KCfur9 zo_0%e=iZ9@MW+&Wm~%}ObG!Zdu$on0mi*1&qaS~~{#N*-a1C!^jcc)4Yuff%?YI72 zRhh0WwC0efk>>=9gxh@&4m_Xr@%-)NZ;cy2tnz-yZ7+ZGhjQrUMVqhX-FWr!#=X0% z?<%?Ga_Fr)<#bkMsg+LF@=o6T$f(sVcW>8!_|KrNeLL^gW-I?Y!F|)>*FP`Met+Mk z>iQA?7CF6->OD1_t19x3?viU>`oqiFc5B<>hcDMZo#nQD+THIy+c)&?3t7&f=FBrK z@zdphYIWzg#aUf9nd|PeYvYIC(Y`9*txb+jeKPe)=h-uQ>*j@C_*B+;qDahL+An+h zkHZrGzgT1QXaGULmMCp~v#ymQt*vEk)R+XxlHF<%4oLw#&59E&X zWC!ik@7|}n??d{be#zIrlG?A7tQGt8>tg{4(D{~>1vM8PCoN_yJ2^4SDD_G zxZ><1@|;`OOZ?GqK78wY_u2mpow=qQU%mYWk1spU(e+4r{g!CqytP}WGA+q5=Y1tT zRd?bu`OVu_KGGF`wCYFEzwdE-FPG|bE!%K*eKvbg=5p`Wo;a7Iw@>svOjSS08d|eX zu=4!cKczo5CVt3%`1IQD>e}TmKc6*;s_nV8_R#HHx>wF_GMnY5ppxZTrqVxETINBx zYxI&iTd(_vs=r)sl)rf2w=|Kp+AH2Jzk6ywUwHauU%fTwoSuIxzmi`g{>WO^IpV|h zjvDtzzKNx^QJ;_4iHhG!S+ik@rc9Y`5to*`h@-d96H%4PyG83xf6KmTa=%6U!R^f4 zb4xb*|7Q@sx#h>{ndx)R7yjOQyCbE@*{Ns2!kZHhuAg5g^W)a{WAQv&b|ihg-)X1t zBiZ-kxsBP6e4`IpO+ET-)wjfTbw>`aiMXS~^Us@+&CY4Y-+Ni-+iL0`@BAbDuwOjp z{*SiD{+CRpZ>#q`s_u9vcI$HJG~Mf5tDnUBnsCeR^qk`%A>NrXVPbIVlDM<(Pj-gi zO}VuF{6gQq>(2iDbNC$CXllCo_(t}@I+cpkzpD@0^ZqGJQEU&}_AzgM*2HL=wOe9hYujG2 zeNOIjaPn4rQgLPV@e`-6?+w1XvcB{0x4!+Q`yOVNzWjCi>;8Kpfv%p8uBh8AsXqod z<;T*8`P@H_AGp;w_p!OakHVVWyJwzTsM|gvablWh-9_oeM>jqWyCadDAN8b-wBSKqGi51V`>N^D~R-_q5Ya{7CB@7Oyr`R1nVFFP}rFmlL6dxWzu&8xd8rPjE< zM*XVpAMZ!3AGS>u`8M4>Vyft)ZPT=8JZ)VX&MIYZC}5DI8C2vR_@Mm0WU0Pzg;8o_khrpQ>TWFkyH0u|MHC`vvS%`K3Ke*Jf88Z~yY2L2yr<%+;xy+GF-(k+UNp_;kx!Y6BE*$jJdRq4Q?6*1P|GsYj9KLf`%e$uCzq{|A z-+k+OXP5s`|1fRJuajFZDzui?+VN_AGB}q!>5{}fiv=8hS6m}y5*dCUnKLhY_Kv?Z z_sQJZHh;g%PQe?0TNgE&R$pzscRu}Q$}~fR$R$0l5<(u59S)odNqg8IZ9c!ICamka z`^W8_k1p_3`L11f$;Q88!|7bT*{jaPSo!`wR<><>zQv^H2|ssQKK**XPio1_hqiIo z!**@@=)3zzlJ>;f$J#4Y_K0hzUEU+5<#r*);kET@U&SB^kC3LOIr>(8w?Dg9zOJ^P z{;)jXa_&OW+f(;zXUzI}`$N3`+I4kI6>^`Ce*333*>nEyu=|1kZgi+EE!lRmdc(0i z)(umfpNXo7-JEvvTygZrKkgr<+%u@>bbr(<`X{9G*|xS_Eowi5CGRBM-NREnr)2&7 z1s~@vxxb}jlb!Ol`iG(a8FFk4<7VDIl`rr!!m51!vplngJSCf)-wuZ94O7?t)~i^5 zXqH|tf8*ELf30pmnqqZsb>WJa=GNhX)5>uw_gnuN>R;EppS_y>C0F#*_0})%h3XGY{xR|KUj3MldfOk*&fYKL zy2R0JZL#T|xX`C}%08;j`_G_yDkpaKgbfw}Wetmk56fvZ2VeT99QlL)(b6yfgyT7G zomcNR?TyRz(p3*DpPBNX!DHghMfp0)od5NLwWbL^^Z2UK2ey^_#~CJBh2Dl z0sqpPvkQLQ(iix5zRuX!{OGlRqV2xda@XEIYA$E7_v)6_N6zj(F(K>Iql(27y`9eO zIrQeXr_#=Tu|Iy>CqLYO@V-FQ>s{f^YuDIRmS)ZR&(K}E^XprU){Xo9E%noiF^x}#vU)uR}9lSn9?)VLZP|&AWP`!vvJ4$cWv6bYwz}LJNHdEoXvA-mF?k{ zbZrTB9Vcx=W_2!qhwSItdsF}ER_uT1JL5+u-~Pp~r1JKvzFYX=bI5$Z4f`gEZHxGD zA?Cz`<9q%yoDM2yd%+p?@N4tZisc97H;4EAQ>d66dU@gG6svXHYCJ2K-d>n%^g3md zO|aX<6Q4wb^-rDMotVUTEB|YJclPJ_j>)^~`afLx?bP)lU+{(ehq?bQT(gpyr}~jc zPNMzb>Z)l~X1hz%CZEasrgBzCNP8pa#JTs)>T++0K9_3mE#nZOx6Mr!3{*XP=d?)X&Zc#d9n2~Xww_G; zY<>hjKD?^pKZE0@%X@OyzOFOe`SEy%PU%^`=Mg*R>Tbz+vXA3GLt45fPsYVt9nV79 z3vWpDtXRA%regIY{}wxi3cmjgfmij^eq=se8+7bn=JE-1!+xGNo2|2C{(Xb3jjAfo zpYLE_u_7pD&-tzYJY)C!2JH`fdtqPr`IFD{=iZ8+T<3Lu<+t8r8xzx4+80nnmp|mhdOgl&(~Gc?6fbf|04Ny>8{^))AQ0lugg!)PVI1dGih_> zLa!AX_YEI3?hxWI{W$-?eBrns%OADgTl@HL**9M9e}dWX#T-{;&Fa3@Yjr$g()ClF z>i3p03f=N!42zw;L*aVrgW7DDALoC||G4dP`A=-|qrYy4WD5;MCF^ZcCn?2#9E$!{G1r(<|5hP@K+>_zzac5AD<1ceUi?pYU|qyHDqL>gDGt zO#f~43Z+Dsk%el7IR0zx3^$ zUpGJfV!T#O){m7B`vrb1_S(Pe$7HjKAD%rwY`(q9Z2QBtm2cOs|C%y0_;hr4Ia8&e z`^h|ScGXXP;ml|DsnvLVUnn(A{ zLg((etmU~g`eZ0e&hO(EdiDG6_e{V4?%v&5wK>`PJ0 zy%`F;!~=>JHf=M$&MvEcu*UnN`5`-rzw7o`F7y1zbnDc!-%eL_zw!#bKC<=+_eQgM zy5ev0&8N$p>DRlvV$H)Df3A8TjFbN`zqLMlz0C3Y^;u?C-bP&+l3M;C-;Z12OZaU!c__|ZtRRvZ*!3WxhwHt@!p!WF>(B4V z?VclD!}lS$;zh-dmqPWq-0Qa|zr8M>@;iL`)4$8#%uAp6{LRmr`}%eEU5hT)u~bMq zeoWoprCYLd;Z>b?dGB-Ye$`&Kee1L*20JSjWcmdDoPA?M2*c#v>rG#n^4>iXHf^%M zb#9YN`hks};Te$Z1KCX{`b02Z_^eX zJf#V@Jik?hS57P!m@Gcgb$98#?5>aO9VYg@_FQ!a6|WD@`riJoR5$Ii=l02mf2m#( z|8(QUx9ib8(=Qx&GBc&id(y>ku7aOJSFO7fzWnZc)9;z}d+#nSvo5v|{d}=b>p#P! zO*_A=Z>mxKaQYGZq1)?uGo;S8_u5EJzqDztd5GtwQ}On1xkZAbH)QnqoeJiWW$=+> zT>IsR_Jb!s3RnIR{;lQy;oP##x?hCi?$~^H3-Vd7dT_#-cSk0r=tcaxelBdk$X1RT z>veZB=UM${XqkWD7JIjiRq(NENw;ok=BCWj-*!-ETc-@ah;l|67egf1zB1w9?0NzI zZX4FewNdh&W>IeK{6{<$n%;+fR6RB^y*6W8q5Q_ADwt{>U>sw!>f(%7gQ8PlTFwy(^* zc+@i0yI^AEnb~taq+^-_on=H6P(hqj;?*GYh^pW(8r|R$49+_2oDSGmM1}2@wf0xHS zTjf2i>~q}qz5D*%OL6@@Jyz+jZuZ~1e|qgZW|be#t$XkAA-}tHw#L@TMys^BcDugK zYP~eYaEH^THJ7zDQwyi@pL>uU#^3gm|7gm`>EC)5N!Rk0c-tzeYVs(YeRfjwaAJ3E zr^79?mOQ_AcD{cXJlfg+NdKszc*N zZ^Ld$OuNPBf0O@luaLd;;fHMdn2!{_e7kCC|FoZGrxkja-0j_Ct#PdQyiZW?t>9Ph z|1&Ik{n>N(H|cE~UrhRQb)WC8yGJ&C{N842FLLik+T|lvzSlc-77N|Y_z>^%eS+dK z$%njq-IXV?2gcQZ_3aCK>BnY;_mrM~>+j~RFnv~upQ*jsyGOA{XGu5O zHuM*@S_^09KRolp^r8O`|93Ww53_5X?uT62GM_U`%SGH;IowDn}i;Y(HSuc zQw_JbG4>V5wrzdhH4ci~qao zovulKm(<;J`&UI}T7@ltkZJOpb^j)D=HNn;XRrOYpXQ12-gjn2?3tQzrWp_mvwpjw7=qqzJA(s|7`xQ{(HO1ySBt8hg~Vz&6{-Scktz|wQr|x zz2s@SGI6TV0+nU%z6a7K9X|3aZTb4FS-N|#{SEesh=J*p@-6=${z!D)R@v_p6Ia>3cFfXO`n+xLk#KQ=s6Eq;m>xZHDK9!r$T+87 zFQegh+o-Lu&C>D?B-NOBIkEZF}t!JMeZ+&8%87^-AUFft(n^?o1zH?D&hP##*et+}#=Jk8(s~`LB`a8cPKcy*p<(Bs%S<@!ONQCG(c~`6p6AxfBo1#@z#OGHl z*R}ff*(+g}?wb9Sb58y6`6283qo+4#7bg}!?zV1Tdndy%`_hi>I}|kc=9>6ERcWvI z`?13;LiVQRianeU=Qmy7r|heLD5|f0W%fSV-v-&d*0JW{V)1F73);{0_?+1-_AK@1 zan+wEnPrx3{V0B*%Kz=ghj&$e%s#xW>didK{hM}d{9VIYbiL?q#1?~Hd3RDvfWUnOy$+{^OnRWQ>w!i3JcRT>0*s=x!U4Kxyru#E-l@@z0Y-$_T|iHI^Gk^4I=}ZT0R|~bG`LP zc+7|IU3&x{>vP=ukhv_ZihyKa7{gdTn z^mv>9HG21d2351xPmilRQ&Jol4ry{(DlqxhY+Y63czsXxf^C!YAHQ`!GEcny=%Ukp zAD6Wj{++G1lKq?Q)gu!tuhm6)Da{PY?_kYcAUIWRlS#dB*6I(N|Ec|F5IQ$AlW*mc z`+hZ{k6!Op$+FM5@L*na;L634xsCIjjaJp{3~F?m!|b;FX3_ogv#x!8{hxvD(N!<) z)z7k@-1`0Ry>-N|XZbC5LOVXVH9lm&H2a_6UDh{CF0Z-vD)p1p#i?um75u)P>H2WS zyW?l)%$okFc7JQBKKHuGj~P@inR=hjR^84PaQWP2%~tVKKR@pcnNz?t=|+HW*bmJ| zCLhZW)pKVH-h3rhQ(k6xV%x8^m+!`CyfF9q&tS6im`m?zCeGI}*N>T2>{F|e*uCRF z!vvo=#hraYb+7JtE{|2+Y5zg#l>M#JciEn?{!_nSKYLgGb9LDkL>$%{#dTgzGb%Xv8;h%EZ_Z@PwQ(o1-bQQXDkhyrj*>3kUi&G|?@wa1* z>j(MfI@=oY{^*b9?IpIcrr9&!h5h@-U3u;9m)9p2mESqoWYW9Q^TEag&ofQ85vjYns@ESyOh9y4JzVGFt_Eb!@_KkPP+yMQLDMxKnIo5sq za#ef1_v;^9wuah@%>EbskKg~Wzg~a(yIJCgFE9Dy^U=R2?P~g->5M6}-7+byP@I1zVveD-pHEOS%f#&yEY|n^6TS1J z-{<4~){^yX{9He7<+sQ2UEQ+ip4fi9{HJGkZ0q{^#N+YSS)NLFpG+^}n0lJAYp(pR zt5@frJGuSGl0EOAoVs4R_xh=C+aLa{T>Mg>`A7Cc_k%0<_}o%kyeu>L<%H!f?pT*_F&ug!)?0Wew@W{DK zo7eQ5i7@s!W2wyEXBe;iNB%?J&PV+6e}X@Jz236N@!@|4!M)QO*Z8h}^?g@~U#Z3O z9kbu^T>ErSFY>gJ`gA4M?&KV4<5%&U-FwqE?zz54zP@g|Kt+*cz)vIhZC6|iRr2zZ zIG;J{EcIEnR(tiaa;;-_Q5W}KnYX`n()Ew;b9H{bl+~LyJ#Kq-UHz<=>sQ{dk^Hds zeS6*)*KJpFOn&e6s*THkb2jPIskB`)?|H_bYfG8HmMJrZM|@7QglO>5sPx0}?e#nf zAD(xt+mg@uLqjX#u36KR*}pkcs%>pP>z&^>R^_zbvCd0yIPj#0>+}1DI+d+Il7HL&QM>#n z-e#BSx8dC7s=NEyyzFSd-HH4C!f){O`HETHUkFY1TEbtCnJSuKoPaFn8_UTek$} zK0JHv`p@`Jdw*+xwQHUKNWL|9PdtCC+DEf~@vgM~bKNf0KU!ZeGv1k?yQhb#h|Oa4 zF7>XGcwyU58Nc?nt@>s>JA3QKou0jW7OyGFRnO*oRgnGoeYcqTuJohP=h-r%wzqzb zO^^ThtoKp=X4NbG>u;>d&3@LlE#icLj)8UU!X*KMJ=L5E*-N(gdOy%V`d8Wgcivs= z2(z+j*EB1IW46wUwF!uhp1w^!GvePxl}!smraIu z{JrPy)cL30)@hwT_3g9WoQpMq%PQL5hi%=kOZd)9^Z2B_9gCHn&5lga^`9K|_CKosU3d*T(8LhV z{*n2iNxYE$!dU+!yPim~DgKUc*NwWFvwqJ`o1>p27iPA3Ft!veNLV1?Rut$`-^u#? zmq9(t{fYNrCg7q&W8J+sT~mnAfAe5(V`^V&-!s*TE*%dq9#H5#b!rWNfP?ZxR}sc6 z=n5#7RGuX=4|)m;BkhDf%$ximp7-T1w$`*=brTZ>+V0(qoSoEHnWj2plBdp*NuDZ` zG8Q$MzJNxJ*4k}TQ&TG|%k$IoS+73UzH?s0YrSPC*U}q28@U)HUoeOSI*63`b7acN zT={y|-EUKL_1nCc`KFxTjZKfN7IIlKO`Sm{W1$XlXO zw2e#$>(g1^aO)K7lpE@Fskrm?F|y=^g(g%rG0JplEZ`0abY%dYmM`ibc}cg%aY=k; z`Nb{U!Z+A%`M9Q6GUKk-2A(BGPlQh>^d0NcG?}$oDo^i)Nv_=K!?#@frY^hfp1M5Z zz_e@EF7l`ZDz%0(UtnN=7_PBs<#zQlFHdcSMO~UaphUSkd1+*3Z_%}~f+*7~#~u~4 fc(rAEFr4@yZ!EAtsPaKo=ql*aGN?!R|K9`v+D=2R literal 0 HcmV?d00001 -- GitLab From 7862d3c98aaba44f2811eb4657ff2c174414d7a1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 31 Aug 2017 01:08:32 -0700 Subject: [PATCH 142/294] Fixed errors in GPU crosstool in cuda_clang mode. The source of errors is that 'crosstool_wrapper_driver_is_not_gcc' was not included in the GPU crosstool, but was still listed in build dependencies. PiperOrigin-RevId: 167107311 --- third_party/gpus/crosstool/BUILD.tpl | 8 ++++---- third_party/gpus/cuda_configure.bzl | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/third_party/gpus/crosstool/BUILD.tpl b/third_party/gpus/crosstool/BUILD.tpl index 7d8b600513..98cb326572 100644 --- a/third_party/gpus/crosstool/BUILD.tpl +++ b/third_party/gpus/crosstool/BUILD.tpl @@ -12,12 +12,12 @@ cc_toolchain_suite( cc_toolchain( name = "cc-compiler-local", - all_files = ":crosstool_wrapper_driver_is_not_gcc", + all_files = "%{linker_files}", compiler_files = ":empty", cpu = "local", dwp_files = ":empty", dynamic_runtime_libs = [":empty"], - linker_files = ":crosstool_wrapper_driver_is_not_gcc", + linker_files = "%{linker_files}", objcopy_files = ":empty", static_runtime_libs = [":empty"], strip_files = ":empty", @@ -30,12 +30,12 @@ cc_toolchain( cc_toolchain( name = "cc-compiler-darwin", - all_files = ":crosstool_wrapper_driver_is_not_gcc", + all_files = "%{linker_files}", compiler_files = ":empty", cpu = "darwin", dwp_files = ":empty", dynamic_runtime_libs = [":empty"], - linker_files = ":crosstool_wrapper_driver_is_not_gcc", + linker_files = "%{linker_files}", objcopy_files = ":empty", static_runtime_libs = [":empty"], strip_files = ":empty", diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index b85e565f36..4a0f471088 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -971,7 +971,6 @@ def _create_local_cuda_repository(repository_ctx): ' ":cudnn-include",') }) # Set up crosstool/ - _file(repository_ctx, "crosstool:BUILD") cc = find_cc(repository_ctx) host_compiler_includes = _host_compiler_includes(repository_ctx, cc) cuda_defines = { @@ -981,11 +980,14 @@ def _create_local_cuda_repository(repository_ctx): } if _use_cuda_clang(repository_ctx): cuda_defines["%{clang_path}"] = cc + _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty"}) _tpl(repository_ctx, "crosstool:CROSSTOOL_clang", cuda_defines, out="crosstool/CROSSTOOL") else: nvcc_path = str(repository_ctx.path("%s/bin/nvcc%s" % (cuda_config.cuda_toolkit_path, ".exe" if cuda_config.cpu_value == "Windows" else ""))) + _tpl(repository_ctx, "crosstool:BUILD", + {"%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc"}) _tpl(repository_ctx, "crosstool:CROSSTOOL_nvcc", cuda_defines, out="crosstool/CROSSTOOL") _tpl(repository_ctx, "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc", -- GitLab From 8b20ddf3e0eedb52a7ae0f10a55658e64efc4d1a Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 31 Aug 2017 09:46:52 -0700 Subject: [PATCH 143/294] [XLA] Sanity check the list of called computations for fusion nodes called_compuatations for a fusion node should only include the fusion computation that it calls. PiperOrigin-RevId: 167149669 --- .../compiler/xla/service/hlo_instruction.cc | 7 ------- .../compiler/xla/service/hlo_instruction.h | 3 +-- .../xla/service/hlo_instruction_test.cc | 9 +++------ .../xla/service/hlo_rematerialization.cc | 3 ++- .../compiler/xla/service/hlo_verifier.cc | 19 +++++++++++++++++++ 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 3bdb67ba92..75b88aeb12 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -793,13 +793,6 @@ HloInstruction* HloInstruction::CloneAndFuseInternal( } } - for (HloComputation* computation : - instruction_to_fuse->called_computations()) { - if (std::find(called_computations_.begin(), called_computations_.end(), - computation) == called_computations_.end()) { - called_computations_.push_back(computation); - } - } VLOG(2) << "New clone:\n" << clone->ToString(); return clone; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 5688fcc425..e393e05c34 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -797,8 +797,7 @@ class HloInstruction { const Shape& shape, tensorflow::gtl::ArraySlice operands); - // Returns the computations this instruction calls (if any). This includes - // computations called by fused instructions inside of a fusion instruction. + // Returns the computations this instruction directly calls (if any). const std::vector& called_computations() const { return called_computations_; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index ea5749581b..2e1eeee36b 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -758,16 +758,13 @@ TEST_F(HloInstructionTest, FusionOpWithCalledComputations) { auto* fusion = computation->CreateFusionInstruction( {map_3_y}, HloInstruction::FusionKind::kLoop); auto* fused_computation = fusion->fused_instructions_computation(); - EXPECT_THAT(fusion->called_computations(), - ElementsAre(fused_computation, computation_y)); + EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation)); fusion->FuseInstruction(map_2_x); - EXPECT_THAT(fusion->called_computations(), - ElementsAre(fused_computation, computation_y, computation_x)); + EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation)); fusion->FuseInstruction(map_1_x); - EXPECT_THAT(fusion->called_computations(), - ElementsAre(fused_computation, computation_y, computation_x)); + EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation)); } TEST_F(HloInstructionTest, ComplexFusionOp) { diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 278a1d7efa..20152cf0ce 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -1248,7 +1248,8 @@ StatusOr HloRematerialization::Run( sequence->at(node.computation()))); } return Status::OK(); - })); + }, + /*visit_unreachable_nodes=*/false)); // The peak memory usage of the module equals the peak memory use of the entry // computation plus the output size of the computation. This is because the diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index eb6fe5d2e3..c44be716cd 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -280,6 +280,14 @@ class ShapeVerifier : public DfsHloVisitor { const std::function shape_size_fn_; }; +string ComputationsToString( + tensorflow::gtl::ArraySlice computations) { + return tensorflow::str_util::Join( + computations, ",", [](string* s, const HloComputation* computation) { + s->append(computation->name()); + }); +} + } // namespace StatusOr HloVerifier::Run(HloModule* module) { @@ -290,6 +298,17 @@ StatusOr HloVerifier::Run(HloModule* module) { for (const auto& instruction : computation->instructions()) { TF_RET_CHECK(instruction->parent() == computation.get()); if (instruction->opcode() == HloOpcode::kFusion) { + TF_RET_CHECK( + ContainersEqual(instruction->called_computations(), + {instruction->fused_instructions_computation()})) + << "Fusion HLO calls computations other than the " + "fused_instructions_computation: " + << instruction->ToString() + << " instruction->fused_instructions_computation(): " + << instruction->fused_instructions_computation()->ToString() + << " instruction->called_computations(): " + << ComputationsToString(instruction->called_computations()); + for (const auto& fused : instruction->fused_instructions()) { TF_RET_CHECK(fused->parent() == instruction->fused_instructions_computation()) -- GitLab From 059c68457a00463146613c0751cb9b16eab28888 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 31 Aug 2017 10:33:32 -0700 Subject: [PATCH 144/294] [TF:XLA] Implement SoftSign, SoftSignGrad, ReciprocalGrad, ApproximateEqual, Rint, IsFinite, IsInf, IsNan. Enable L2Loss test case that apparently passes now. PiperOrigin-RevId: 167156124 --- tensorflow/compiler/tests/binary_ops_test.py | 19 +++++++ tensorflow/compiler/tests/randomized_tests.cc | 51 +++++++++++++++++-- tensorflow/compiler/tests/unary_ops_test.py | 40 ++++++++++++++- tensorflow/compiler/tf2xla/kernels/BUILD | 1 - .../compiler/tf2xla/kernels/binary_ops.cc | 25 +++++++++ .../compiler/tf2xla/kernels/is_finite_op.cc | 43 ---------------- .../compiler/tf2xla/kernels/unary_ops.cc | 25 ++++++--- tensorflow/core/ops/nn_ops.cc | 2 +- 8 files changed, 148 insertions(+), 58 deletions(-) delete mode 100644 tensorflow/compiler/tf2xla/kernels/is_finite_op.cc diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index e349aefd4c..e6862f0d9d 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -52,6 +52,12 @@ class BinaryOpsTest(XLATestCase): def testFloatOps(self): for dtype in self.float_types: + self._testBinary( + lambda x, y: math_ops.approximate_equal(x, y, tolerance=0.0001), + np.array([[[[-1, 2.00009999], [-3, 4.01]]]], dtype=dtype), + np.array([[[[-1.001, 2], [-3.00009, 4]]]], dtype=dtype), + expected=np.array([[[[False, True], [True, False]]]], dtype=dtype)) + self._testBinary( gen_math_ops._real_div, np.array([3, 3, -1.5, -8, 44], dtype=dtype), @@ -82,6 +88,12 @@ class BinaryOpsTest(XLATestCase): dtype(4), expected=np.array([[16], [81]], dtype=dtype)) + self._testBinary( + gen_math_ops._reciprocal_grad, + np.array([4, -3, -2, 1], dtype=dtype), + np.array([5, -6, 7, -8], dtype=dtype), + expected=np.array([-80, 54, -28, 8], dtype=dtype)) + self._testBinary( gen_math_ops._sigmoid_grad, np.array([4, 3, 2, 1], dtype=dtype), @@ -107,6 +119,13 @@ class BinaryOpsTest(XLATestCase): expected=np.array( [3.97322869, 2.99258232, 1.99817801, 0.99966466], dtype=dtype)) + self._testBinary( + gen_nn_ops._softsign_grad, + np.array([4, 3, 2, 1], dtype=dtype), + np.array([5, 6, 7, 8], dtype=dtype), + expected=np.array( + [0.11111111, 0.06122449, 0.03125, 0.01234568], dtype=dtype)) + self._testBinary( gen_math_ops._tanh_grad, np.array([4, 3, 2, 1], dtype=dtype), diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc index a342e37e0e..49c1699b6e 100644 --- a/tensorflow/compiler/tests/randomized_tests.cc +++ b/tensorflow/compiler/tests/randomized_tests.cc @@ -888,6 +888,16 @@ TEST_F(OpTest, Any) { }); } +TEST_F(OpTest, ApproximateEqual) { + Repeatedly([this]() { + auto dims = RandomDims(); + return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("ApproximateEqual") + .RandomInput(DT_FLOAT, dims) + .RandomInput(DT_FLOAT, dims) + .Attr("T", DT_FLOAT)); + }); +} + TEST_F(OpTest, Asinh) { Repeatedly([this]() { return ExpectTfAndXlaOutputsAreClose( @@ -1662,11 +1672,9 @@ TEST_F(OpTest, GreaterEqual) { TEST_F(OpTest, L2Loss) { Repeatedly([this]() { - DataType type = Choose({DT_INT32, DT_FLOAT}); - // TODO(b/31644876): scalars currently crash. - return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("L2Loss") - .RandomInput(type, RandomDims(1)) - .Attr("T", type)); + DataType type = DT_FLOAT; + return ExpectTfAndXlaOutputsAreClose( + OpTestBuilder("L2Loss").RandomInput(type).Attr("T", type)); }); } @@ -2165,6 +2173,15 @@ TEST_F(OpTest, Reciprocal) { }); } +TEST_F(OpTest, ReciprocalGrad) { + Repeatedly([this]() { + std::vector dims = RandomDims(); + return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("ReciprocalGrad") + .RandomInput(DT_FLOAT, dims) + .RandomInput(DT_FLOAT, dims) + .Attr("T", DT_FLOAT)); + }); +} TEST_F(OpTest, Relu) { Repeatedly([this]() { return ExpectTfAndXlaOutputsAreClose( @@ -2250,6 +2267,13 @@ TEST_F(OpTest, ReverseV2) { }); } +TEST_F(OpTest, Rint) { + Repeatedly([this]() { + return ExpectTfAndXlaOutputsAreClose( + OpTestBuilder("Rint").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT)); + }); +} + TEST_F(OpTest, Round) { Repeatedly([this]() { return ExpectTfAndXlaOutputsAreClose( @@ -2402,6 +2426,23 @@ TEST_F(OpTest, SoftplusGrad) { }); } +TEST_F(OpTest, Softsign) { + Repeatedly([this]() { + return ExpectTfAndXlaOutputsAreClose( + OpTestBuilder("Softsign").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT)); + }); +} + +TEST_F(OpTest, SoftsignGrad) { + Repeatedly([this]() { + std::vector dims = RandomDims(); + return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("SoftsignGrad") + .RandomInput(DT_FLOAT, dims) + .RandomInput(DT_FLOAT, dims) + .Attr("T", DT_FLOAT)); + }); +} + TEST_F(OpTest, SpaceToBatch) { Repeatedly([this]() { std::vector block_dims = RandomDims(4, 4, 0, 5); diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index ca2a438005..b21f1998a5 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import unittest + import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin @@ -161,12 +163,17 @@ class UnaryOpsTest(XLATestCase): np.array([[-1.7, 1.2]], dtype=dtype), expected=np.array([[-2, 1]], dtype=dtype)) + self._assertOpOutputMatchesExpected( + math_ops.is_finite, + np.array([[np.NINF, -2, -1, 0, 0.5, 1, 2, np.inf, np.nan]], + dtype=dtype), + expected=np.array([[0, 1, 1, 1, 1, 1, 1, 0, 0]], dtype=np.bool)) + # Tests for tf.nn ops. self._assertOpOutputMatchesExpected( nn_ops.l2_loss, np.array([[[]]], dtype=dtype), expected=dtype(0)) - # TODO(b/31644876): enable this test case when fixed. - # self._assertOpOutputMatchesExpected(tf.nn.l2_loss, dtype(4), dtype(10)) + self._assertOpOutputMatchesExpected(nn_ops.l2_loss, dtype(4), dtype(8)) self._assertOpOutputMatchesExpected( nn_ops.l2_loss, np.array([[-2, 4]], dtype=dtype), expected=dtype(10)) @@ -198,6 +205,12 @@ class UnaryOpsTest(XLATestCase): np.array([[1e-14, 1e-15, 0.6]], dtype=dtype), expected=np.log1p(np.array([[1e-14, 1e-15, 0.6]], dtype=dtype))) + self._assertOpOutputMatchesExpected( + math_ops.rint, + np.array([[-1.7, 1.2, 4.0, 0.0], [-3.5, -2.5, -1.5, -0.5], + [0.5, 1.5, 2.5, 3.5]], dtype=dtype), + expected=np.array([[-2, 1, 4, 0], [-4, -2, -2, 0], [0, 2, 2, 4]], + dtype=dtype)) self._assertOpOutputMatchesExpected( math_ops.round, np.array([[-1.7, 1.2, 4.0, 0.0], [-3.5, -2.5, -1.5, -0.5], @@ -301,6 +314,12 @@ class UnaryOpsTest(XLATestCase): np.array([[-2, 0, 8]], dtype=dtype), expected=np.array([[0.126928, 0.6931472, 8.0003354]], dtype=dtype)) + self._assertOpOutputMatchesExpected( + nn_ops.softsign, + np.array([[-2, -1, 0, 1, 2]], dtype=dtype), + expected=np.array([[-0.66666669, -0.5, 0, 0.5, 0.66666669]], + dtype=dtype)) + self._assertOpOutputMatchesExpected( math_ops.is_finite, np.array( @@ -335,6 +354,23 @@ class UnaryOpsTest(XLATestCase): np.array([[4, 3], [2, 1]], dtype=dtype), expected=np.array([[1, 1], [1, 1]], dtype=dtype)) + # TODO(phawkins): these tests fail unless fastmath optimizations + # are disabled. Use more robust IsInf/IsNaN detection and enable these + # tests. + @unittest.skip("test case fails in fast-math mode") + def testIsInfAndIsNan(self): + for dtype in self.float_types: + self._assertOpOutputMatchesExpected( + math_ops.is_inf, + np.array([[np.NINF, -2, -1, 0, 0.5, 1, 2, np.inf, np.nan]], + dtype=dtype), + expected=np.array([[1, 0, 0, 0, 0, 0, 0, 1, 0]], dtype=np.bool)) + self._assertOpOutputMatchesExpected( + math_ops.is_nan, + np.array([[np.NINF, -2, -1, 0, 0.5, 1, 2, np.inf, np.nan]], + dtype=dtype), + expected=np.array([[0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=np.bool)) + def testLogicalOps(self): self._assertOpOutputMatchesExpected( math_ops.logical_not, diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index d09e721c93..6e6c5dc17f 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -31,7 +31,6 @@ tf_kernel_library( "function_ops.cc", "gather_op.cc", "identity_op.cc", - "is_finite_op.cc", "l2loss_op.cc", "lrn_ops.cc", "matmul_op.cc", diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc index f9bb1e2fb1..58538b4513 100644 --- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc @@ -102,6 +102,7 @@ XLA_MAKE_BINARY(Mod, b->Rem(lhs, rhs, extend_dimensions)); XLA_MAKE_BINARY(Maximum, b->Max(lhs, rhs, extend_dimensions)); XLA_MAKE_BINARY(Minimum, b->Min(lhs, rhs, extend_dimensions)); XLA_MAKE_BINARY(RealDiv, b->Div(lhs, rhs, extend_dimensions)); +XLA_MAKE_BINARY(ReciprocalGrad, b->Neg(b->Mul(rhs, b->Mul(lhs, lhs)))); XLA_MAKE_BINARY( RsqrtGrad, b->Mul(b->Pow(lhs, XlaHelpers::IntegerLiteral(b, input_type(0), 3)), @@ -140,6 +141,11 @@ XLA_MAKE_BINARY(SoftplusGrad, b->Div(lhs, b->Add(b->Exp(b->Neg(rhs)), XlaHelpers::One(b, input_type(1))))); +// softsigngrad(gradients, features) = gradients / (1 + abs(features)) ** 2 +XLA_MAKE_BINARY(SoftsignGrad, + b->Div(lhs, Square(b, b->Add(XlaHelpers::One(b, input_type(0)), + b->Abs(rhs))))); + XLA_MAKE_BINARY(TanhGrad, b->Mul(rhs, b->Sub(XlaHelpers::One(b, input_type(0)), b->Mul(lhs, lhs)))); @@ -147,5 +153,24 @@ XLA_MAKE_BINARY(Pow, b->Pow(lhs, rhs, extend_dimensions)); #undef XLA_MAKE_BINARY +class ApproximateEqualOp : public XlaOpKernel { + public: + explicit ApproximateEqualOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("tolerance", &tolerance_)); + } + + // Computes the max of the scalar input x and 0. + void Compile(XlaOpKernelContext* ctx) override { + xla::ComputationBuilder* b = ctx->builder(); + auto result = b->Lt(b->Abs(b->Sub(ctx->Input(0), ctx->Input(1))), + XlaHelpers::FloatLiteral(b, input_type(0), tolerance_)); + ctx->SetOutput(0, result); + } + + private: + float tolerance_; +}; +REGISTER_XLA_OP(Name("ApproximateEqual"), ApproximateEqualOp); + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/is_finite_op.cc b/tensorflow/compiler/tf2xla/kernels/is_finite_op.cc deleted file mode 100644 index 788dcee544..0000000000 --- a/tensorflow/compiler/tf2xla/kernels/is_finite_op.cc +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/tf2xla/xla_helpers.h" -#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" -#include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/literal_util.h" -#include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/bcast.h" - -namespace tensorflow { -namespace { - -class IsFiniteOp : public XlaOpKernel { - public: - explicit IsFiniteOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} - - void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationDataHandle input = ctx->Input(0); - ctx->SetOutput(0, ctx->builder()->IsFinite(input)); - } - - private: - TF_DISALLOW_COPY_AND_ASSIGN(IsFiniteOp); -}; - -REGISTER_XLA_OP(Name("IsFinite"), IsFiniteOp); - -} // anonymous namespace -} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc index 7b39f0533b..6b8f5ec7b3 100644 --- a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc @@ -73,8 +73,12 @@ XLAJIT_MAKE_UNARY(Exp, b->Exp(x)); XLAJIT_MAKE_UNARY(Expm1, b->Sub(b->Exp(x), XlaHelpers::One(b, input_type(0)))); XLAJIT_MAKE_UNARY(Floor, b->Floor(x)); -// Returns 0 if x is 0, -1 if x < 0 and 1 if x > 0. -XLAJIT_MAKE_UNARY(Sign, b->Sign(x)); +XLAJIT_MAKE_UNARY(IsFinite, b->IsFinite(x)); +XLAJIT_MAKE_UNARY(IsInf, b->Eq(b->Abs(x), + XlaHelpers::FloatLiteral( + b, input_type(0), + std::numeric_limits::infinity()))); +XLAJIT_MAKE_UNARY(IsNan, b->Ne(x, x)); // Return 1/x XLAJIT_MAKE_UNARY(Inv, b->Div(XlaHelpers::One(b, input_type(0)), x)); XLAJIT_MAKE_UNARY(Reciprocal, b->Div(XlaHelpers::One(b, input_type(0)), x)); @@ -105,6 +109,12 @@ static xla::ComputationDataHandle Round(xla::ComputationBuilder* b, b->Add(round_val, one), round_val); } +XLAJIT_MAKE_UNARY(Rint, Round(b, input_type(0), x)); +XLAJIT_MAKE_UNARY(Round, Round(b, input_type(0), x)); + +XLAJIT_MAKE_UNARY(Rsqrt, + b->Pow(x, XlaHelpers::FloatLiteral(b, input_type(0), -0.5))); + // Expresses sigmoid as a rescaled tanh: sigmoid(x) == (tanh(x/2) + 1) / 2. static xla::ComputationDataHandle Sigmoid(xla::ComputationBuilder* b, DataType dtype, @@ -112,16 +122,19 @@ static xla::ComputationDataHandle Sigmoid(xla::ComputationBuilder* b, auto half = XlaHelpers::FloatLiteral(b, dtype, 0.5); return b->Add(half, b->Mul(half, b->Tanh(b->Mul(half, x)))); } - -XLAJIT_MAKE_UNARY(Round, Round(b, input_type(0), x)); -XLAJIT_MAKE_UNARY(Rsqrt, - b->Pow(x, XlaHelpers::FloatLiteral(b, input_type(0), -0.5))); XLAJIT_MAKE_UNARY(Sigmoid, Sigmoid(b, input_type(0), x)); + +// Returns 0 if x is 0, -1 if x < 0 and 1 if x > 0. +XLAJIT_MAKE_UNARY(Sign, b->Sign(x)); XLAJIT_MAKE_UNARY(Sinh, b->Mul(b->Sub(b->Exp(x), b->Exp(b->Neg(x))), XlaHelpers::FloatLiteral(b, input_type(0), 0.5))); XLAJIT_MAKE_UNARY(Softplus, b->Log(b->Add(b->Exp(x), XlaHelpers::One(b, input_type(0))))); +// softsign(x) = x / (abs(x) + 1) +XLAJIT_MAKE_UNARY(Softsign, + b->Div(x, + b->Add(b->Abs(x), XlaHelpers::One(b, input_type(0))))); XLAJIT_MAKE_UNARY(Sqrt, b->Pow(x, XlaHelpers::FloatLiteral(b, input_type(0), 0.5))); XLAJIT_MAKE_UNARY(Square, b->Mul(x, x)); diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 0a96258dd1..1ab1f1a736 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1945,7 +1945,7 @@ Computes softsign gradients for a softsign operation. gradients: The backpropagated gradients to the corresponding softsign operation. features: The features passed as input to the corresponding softsign operation. -backprops: The gradients: `gradients / (1 + abs(-features)) ** 2`. +backprops: The gradients: `gradients / (1 + abs(features)) ** 2`. )doc"); // -------------------------------------------------------------------------- -- GitLab From 8dbd2b91f9b387e6f3b0084671e46325fc9915be Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 31 Aug 2017 10:51:13 -0700 Subject: [PATCH 145/294] Update ops-related pbtxt files. PiperOrigin-RevId: 167158999 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 63b7532b33..3a28ce3767 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -24686,7 +24686,7 @@ op { } output_arg { name: "backprops" - description: "The gradients: `gradients / (1 + abs(-features)) ** 2`." + description: "The gradients: `gradients / (1 + abs(features)) ** 2`." type_attr: "T" } attr { -- GitLab From 863163067ad38e093ae7c4557c5065f589eef70c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 31 Aug 2017 11:00:15 -0700 Subject: [PATCH 146/294] Introducing tf.contrib.receptive_field PiperOrigin-RevId: 167160384 --- tensorflow/BUILD | 1 + tensorflow/contrib/receptive_field/BUILD | 71 +++ tensorflow/contrib/receptive_field/README.md | 164 ++++++ .../contrib/receptive_field/__init__.py | 23 + .../receptive_field/python/__init__.py | 19 + .../python/util/examples/compute_rf.py | 90 ++++ .../python/util/examples/rf_benchmark.py | 460 +++++++++++++++++ .../write_inception_resnet_v2_graph.py | 57 +++ .../python/util/graph_compute_order.py | 88 ++++ .../python/util/receptive_field.py | 481 ++++++++++++++++++ .../python/util/receptive_field_test.py | 221 ++++++++ tensorflow/tools/pip_package/BUILD | 1 + 12 files changed, 1676 insertions(+) create mode 100644 tensorflow/contrib/receptive_field/BUILD create mode 100644 tensorflow/contrib/receptive_field/README.md create mode 100644 tensorflow/contrib/receptive_field/__init__.py create mode 100644 tensorflow/contrib/receptive_field/python/__init__.py create mode 100644 tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py create mode 100644 tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py create mode 100644 tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py create mode 100644 tensorflow/contrib/receptive_field/python/util/graph_compute_order.py create mode 100644 tensorflow/contrib/receptive_field/python/util/receptive_field.py create mode 100644 tensorflow/contrib/receptive_field/python/util/receptive_field_test.py diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 799f2a72d0..d14c593ade 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -315,6 +315,7 @@ filegroup( "//tensorflow/contrib/nn:all_files", "//tensorflow/contrib/opt:all_files", "//tensorflow/contrib/predictor:all_files", + "//tensorflow/contrib/receptive_field:all_files", "//tensorflow/contrib/reduce_slice_ops:all_files", "//tensorflow/contrib/remote_fused_graph/pylib:all_files", "//tensorflow/contrib/resampler:all_files", diff --git a/tensorflow/contrib/receptive_field/BUILD b/tensorflow/contrib/receptive_field/BUILD new file mode 100644 index 0000000000..ed2f3af08c --- /dev/null +++ b/tensorflow/contrib/receptive_field/BUILD @@ -0,0 +1,71 @@ +# Description: +# Contains modules to compute receptive field parameters for CNN models. + +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "py_test") + +# Transitive dependencies of this target will be included in the pip package. +py_library( + name = "receptive_field_pip", + deps = [ + ":graph_compute_order_py", + ":receptive_field_py", + ], +) + +py_library( + name = "graph_compute_order_py", + srcs = [ + "__init__.py", + "python/util/graph_compute_order.py", + ], + srcs_version = "PY2AND3", +) + +py_library( + name = "receptive_field_py", + srcs = [ + "__init__.py", + "python/util/receptive_field.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":graph_compute_order_py", + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + ], +) + +py_test( + name = "receptive_field_test", + srcs = ["python/util/receptive_field_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":receptive_field_py", + "//tensorflow/contrib/framework:framework_py", + "//tensorflow/contrib/slim", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:nn", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/receptive_field/README.md b/tensorflow/contrib/receptive_field/README.md new file mode 100644 index 0000000000..f7539ec145 --- /dev/null +++ b/tensorflow/contrib/receptive_field/README.md @@ -0,0 +1,164 @@ +# Receptive field computation for convnets + +This library enables you to easily compute the receptive field parameters of +your favorite convnet. You can use it to understand how big of an input image +region your output features depend on. Better yet, using the parameters computed +by the library, you can easily find the exact image region which is used to +compute each convnet feature. + +## Basic usage + +The main function to be called is `compute_receptive_field_from_graph_def`, +which will return the receptive field, effective stride and effective padding +for both horizontal and vertical directions. + +For example, if your model is constructed using the function +`my_model_construction()`, you can use the library as follows: + +```python +import tensorflow as tf +from tensorflow.contrib import receptive_field + +# Construct graph. +g = tf.Graph() +with g.as_default(): + images = tf.placeholder(tf.float32, shape=(1, None, None, 3), name='input_image') + my_model_construction(images) + +# Compute receptive field parameters. +rf_x, rf_y, eff_stride_x, eff_stride_y, eff_pad_x, eff_pad_y = \ + receptive_field.compute_receptive_field_from_graph_def( \ + g.as_graph_def(), 'input_image', 'my_output_endpoint') +``` + +Here's a simple example of computing the receptive field parameters for +Inception-Resnet-v2. To get this to work, be sure to checkout +[tensorflow/models](https://github.com/tensorflow/models), so that the Inception +models are available to you. This can be done in three simple commands: + +```sh +git clone https://github.com/tensorflow/models +cd models/slim +sudo python setup.py install_lib +``` + +You can then compute the receptive field parameters for Inception-Resnet-v2 as: + +```python +from nets import inception +import tensorflow as tf +from tensorflow.contrib import receptive_field + +# Construct graph. +g = tf.Graph() +with g.as_default(): + images = tf.placeholder(tf.float32, shape=(1, None, None, 3), name='input_image') + inception.inception_resnet_v2_base(images) + +# Compute receptive field parameters. +rf_x, rf_y, eff_stride_x, eff_stride_y, eff_pad_x, eff_pad_y = \ + receptive_field.compute_receptive_field_from_graph_def( \ + g.as_graph_def(), 'input_image', 'InceptionResnetV2/Conv2d_7b_1x1/Relu') +``` + +This will give you `rf_x = rf_y = 3039`, `eff_stride_x = eff_stride_y = 32`, and +`eff_pad_x = eff_pad_y = 1482`. This means that each feature that is output at +the node `'InceptionResnetV2/Conv2d_7b_1x1/Relu'` is computed from a region +which is of size `3039x3039`. Further, by using the expressions + +```python +center_x = -eff_pad_x + feature_x*eff_stride_x + (rf_x - 1)/2 +center_y = -eff_pad_y + feature_y*eff_stride_y + (rf_y - 1)/2 +``` + +one can compute the center of the region in the input image that is used to +compute the output feature at position `[feature_x, feature_y]`. For example, +the feature at position `[0, 2]` at the output of the layer +`'InceptionResnetV2/Conv2d_7b_1x1/Relu'` is centered in the original image in +the position `[37, 101]`. + +TODO: include link to derivations and definitions of different parameters. + +## Receptive field benchmark + +As you might expect, it is straightforward to run this library on the popular +convnets, and gather their receptive fields. We provide a python script which +does exactly that, available under `python/util/examples/rf_benchmark.py`. + +To get this to work, be sure to checkout +[tensorflow/models](https://github.com/tensorflow/models) (see the 3-command +instructions for this above). Then, simply: + +```sh +cd python/util/examples +python rf_benchmark.py --csv_path /tmp/rf_benchmark_results.csv +``` + +The script will write to stdout the receptive field parameters for many variants +of several popular convnets: AlexNet, VGG, ResNet, Inception, Mobilenet. They +are also written to the file `/tmp/rf_benchmark_results.csv`. + +TODO: include here a plot for receptive field sizes of different convnets. + +TODO: include table/link to pre-computed RF parameters. + +## Compute RF parameters from a graph pbtxt + +We also provide a utility to compute the receptive field parameters directly +from a graph protobuf file. + +Have a `graph.pbtxt` file and want to compute its receptive field parameters? We +got you covered. The only prerequisite is to install +[google/protobuf](https://github.com/google/protobuf), which you probably +already have if you're using tensorflow (otherwise, follow installation +instructions [here](https://github.com/google/protobuf/tree/master/python)). + +This should work: + +```sh +cd python/util/examples +python compute_rf.py \ + --graph_path /path/to/graph.pbtxt \ + --output_path /path/to/output/rf_info.txt \ + --input_node my_input_node \ + --output_node my_output_node +``` + +Don't know how to generate a graph protobuf file? Take a look at the +`write_inception_resnet_v2_graph.py` script, which shows how to save it for the +Inception-Resnet-v2 model: + +```sh +cd python/util/examples +python write_inception_resnet_v2_graph.py --graph_dir /tmp --graph_filename graph.pbtxt +``` + +This will write the Inception-Resnet-v2 graph protobuf to `/tmp/graph.pbtxt`. + +For completeness, here's how you would use this file to get the receptive field +parameters of the Inception-Resnet-v2 model: + +```sh +cd python/util/examples +python compute_rf.py \ + --graph_path /tmp/graph.pbtxt \ + --output_path /tmp/rf_info.txt \ + --input_node input_image \ + --output_node InceptionResnetV2/Conv2d_7b_1x1/Relu +``` + +This will write the receptive field parameters of the model to +`/tmp/rf_info.txt`, which will look like: + +```sh +Receptive field size (horizontal) = 3039 +Receptive field size (vertical) = 3039 +Effective stride (horizontal) = 32 +Effective stride (vertical) = 32 +Effective padding (horizontal) = 1482 +Effective padding (vertical) = 1482 +``` + +## Authors + +André Araujo (andrefaraujo@) and Mark Sandler diff --git a/tensorflow/contrib/receptive_field/__init__.py b/tensorflow/contrib/receptive_field/__init__.py new file mode 100644 index 0000000000..10745a6a53 --- /dev/null +++ b/tensorflow/contrib/receptive_field/__init__.py @@ -0,0 +1,23 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Module to compute receptive field parameters for CNN tensorflow models.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# pylint: disable=unused-import +from tensorflow.contrib.receptive_field.python.util.graph_compute_order import get_compute_order +from tensorflow.contrib.receptive_field.python.util.receptive_field import compute_receptive_field_from_graph_def +# pylint: enable=unused-import diff --git a/tensorflow/contrib/receptive_field/python/__init__.py b/tensorflow/contrib/receptive_field/python/__init__.py new file mode 100644 index 0000000000..217047f92d --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Module to compute receptive field parameters for CNN tensorflow models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function diff --git a/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py b/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py new file mode 100644 index 0000000000..70a0d11dff --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py @@ -0,0 +1,90 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Computes Receptive Field (RF) information given a graph protobuf. + +For an example of usage, see accompanying file compute_rf.sh +""" + +import argparse +import sys + +from google.protobuf import text_format + +from tensorflow.contrib import receptive_field +from tensorflow.core.framework import graph_pb2 +from tensorflow.python.platform import app +from tensorflow.python.platform import gfile +from tensorflow.python.platform import tf_logging as logging + +cmd_args = None + + +def _load_graphdef(path): + """Helper function to load GraphDef from file. + + Args: + path: Path to pbtxt file. + + Returns: + graph_def: A GraphDef object. + """ + graph_def = graph_pb2.GraphDef() + pbstr = gfile.Open(path).read() + text_format.Parse(pbstr, graph_def) + return graph_def + + +def main(unused_argv): + + graph_def = _load_graphdef(cmd_args.graph_path) + + (receptive_field_x, receptive_field_y, effective_stride_x, effective_stride_y, + effective_padding_x, effective_padding_y + ) = receptive_field.compute_receptive_field_from_graph_def( + graph_def, cmd_args.input_node, cmd_args.output_node) + + logging.info('Receptive field size (horizontal) = %s', receptive_field_x) + logging.info('Receptive field size (vertical) = %s', receptive_field_y) + logging.info('Effective stride (horizontal) = %s', effective_stride_x) + logging.info('Effective stride (vertical) = %s', effective_stride_y) + logging.info('Effective padding (horizontal) = %s', effective_padding_x) + logging.info('Effective padding (vertical) = %s', effective_padding_y) + + f = gfile.GFile('%s' % cmd_args.output_path, 'w') + f.write('Receptive field size (horizontal) = %s\n' % receptive_field_x) + f.write('Receptive field size (vertical) = %s\n' % receptive_field_y) + f.write('Effective stride (horizontal) = %s\n' % effective_stride_x) + f.write('Effective stride (vertical) = %s\n' % effective_stride_y) + f.write('Effective padding (horizontal) = %s\n' % effective_padding_x) + f.write('Effective padding (vertical) = %s\n' % effective_padding_y) + f.close() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.register('type', 'bool', lambda v: v.lower() == 'true') + parser.add_argument( + '--graph_path', type=str, default='', help='Graph path (pbtxt format).') + parser.add_argument( + '--output_path', + type=str, + default='', + help='Path to output text file where RF information will be written to.') + parser.add_argument( + '--input_node', type=str, default='', help='Name of input node.') + parser.add_argument( + '--output_node', type=str, default='', help='Name of output node.') + cmd_args, unparsed = parser.parse_known_args() + app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py b/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py new file mode 100644 index 0000000000..94228dfa61 --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py @@ -0,0 +1,460 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Computes Receptive Field (RF) information for different models. + +The receptive field (and related parameters) for the different models are +printed to stdout, and may also optionally be written to a CSV file. + +For an example of usage, see rf_benchmark.sh +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import csv +import sys + +from nets import alexnet +from nets import inception +from nets import mobilenet_v1 +from nets import resnet_v1 +from nets import resnet_v2 +from nets import vgg +from tensorflow.contrib import framework +from tensorflow.contrib import receptive_field +from tensorflow.contrib import slim +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import app + +cmd_args = None + +# Input node name for all architectures. +_INPUT_NODE = 'input_image' + +# Variants of different network architectures. + +# - resnet: different versions and sizes. +_SUPPORTED_RESNET_VARIANTS = [ + 'resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v1_200', + 'resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'resnet_v2_200' +] + +# - inception_resnet_v2: default, and version with SAME padding. +_SUPPORTED_INCEPTIONRESNETV2_VARIANTS = [ + 'inception_resnet_v2', 'inception_resnet_v2-same' +] + +# - inception_v2: default, and version with no separable conv. +_SUPPORTED_INCEPTIONV2_VARIANTS = [ + 'inception_v2', 'inception_v2-no-separable-conv' +] + +# - inception_v3: default version. +_SUPPORTED_INCEPTIONV3_VARIANTS = ['inception_v3'] + +# - inception_v4: default version. +_SUPPORTED_INCEPTIONV4_VARIANTS = ['inception_v4'] + +# - alexnet_v2: default version. +_SUPPORTED_ALEXNETV2_VARIANTS = ['alexnet_v2'] + +# - vgg: vgg_a (with 11 layers) and vgg_16 (version D). +_SUPPORTED_VGG_VARIANTS = ['vgg_a', 'vgg_16'] + +# - mobilenet_v1: 100% and 75%. +_SUPPORTED_MOBILENETV1_VARIANTS = ['mobilenet_v1', 'mobilenet_v1_075'] + + +def _construct_model(model_type='resnet_v1_50'): + """Constructs model for the desired type of CNN. + + Args: + model_type: Type of model to be used. + + Returns: + end_points: A dictionary from components of the network to the corresponding + activations. + + Raises: + ValueError: If the model_type is not supported. + """ + # Placeholder input. + images = array_ops.placeholder( + dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE) + + # Construct model. + if model_type == 'inception_resnet_v2': + _, end_points = inception.inception_resnet_v2_base(images) + elif model_type == 'inception_resnet_v2-same': + _, end_points = inception.inception_resnet_v2_base( + images, align_feature_maps=True) + elif model_type == 'inception_v2': + _, end_points = inception.inception_v2_base(images) + elif model_type == 'inception_v2-no-separable-conv': + _, end_points = inception.inception_v2_base( + images, use_separable_conv=False) + elif model_type == 'inception_v3': + _, end_points = inception.inception_v3_base(images) + elif model_type == 'inception_v4': + _, end_points = inception.inception_v4_base(images) + elif model_type == 'alexnet_v2': + _, end_points = alexnet.alexnet_v2(images) + elif model_type == 'vgg_a': + _, end_points = vgg.vgg_a(images) + elif model_type == 'vgg_16': + _, end_points = vgg.vgg_16(images) + elif model_type == 'mobilenet_v1': + _, end_points = mobilenet_v1.mobilenet_v1_base(images) + elif model_type == 'mobilenet_v1_075': + _, end_points = mobilenet_v1.mobilenet_v1_base( + images, depth_multiplier=0.75) + elif model_type == 'resnet_v1_50': + _, end_points = resnet_v1.resnet_v1_50( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v1_101': + _, end_points = resnet_v1.resnet_v1_101( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v1_152': + _, end_points = resnet_v1.resnet_v1_152( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v1_200': + _, end_points = resnet_v1.resnet_v1_200( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v2_50': + _, end_points = resnet_v2.resnet_v2_50( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v2_101': + _, end_points = resnet_v2.resnet_v2_101( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v2_152': + _, end_points = resnet_v2.resnet_v2_152( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v2_200': + _, end_points = resnet_v2.resnet_v2_200( + images, num_classes=None, is_training=False, global_pool=False) + else: + raise ValueError('Unsupported model_type %s.' % model_type) + + return end_points + + +def _get_desired_end_point_keys(model_type='resnet_v1_50'): + """Gets list of desired end point keys for a type of CNN. + + Args: + model_type: Type of model to be used. + + Returns: + desired_end_point_types: A list containing the desired end-points. + + Raises: + ValueError: If the model_type is not supported. + """ + if model_type in _SUPPORTED_RESNET_VARIANTS: + blocks = ['block1', 'block2', 'block3', 'block4'] + desired_end_point_keys = ['%s/%s' % (model_type, i) for i in blocks] + elif model_type in _SUPPORTED_INCEPTIONRESNETV2_VARIANTS: + desired_end_point_keys = [ + 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', + 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', + 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1' + ] + elif model_type in _SUPPORTED_INCEPTIONV2_VARIANTS: + desired_end_point_keys = [ + 'Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', + 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b', + 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c' + ] + elif model_type in _SUPPORTED_INCEPTIONV3_VARIANTS: + desired_end_point_keys = [ + 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', + 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', + 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', + 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c' + ] + elif model_type in _SUPPORTED_INCEPTIONV4_VARIANTS: + desired_end_point_keys = [ + 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a', + 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_5e', + 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_6f', + 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c', 'Mixed_7d' + ] + elif model_type in _SUPPORTED_ALEXNETV2_VARIANTS: + ep = ['conv1', 'pool1', 'conv2', 'conv3', 'conv4', 'conv5', 'pool5'] + desired_end_point_keys = ['%s/%s' % (model_type, i) for i in ep] + elif model_type in _SUPPORTED_VGG_VARIANTS: + ep = [ + 'conv1/conv1_1', 'pool1', 'conv2/conv2_1', 'pool2', 'conv3/conv3_1', + 'conv3/conv3_2', 'pool3', 'conv4/conv4_1', 'conv4/conv4_2', 'pool4', + 'conv5/conv5_1', 'conv5/conv5_2', 'pool5' + ] + desired_end_point_keys = ['%s/%s' % (model_type, i) for i in ep] + elif model_type in _SUPPORTED_MOBILENETV1_VARIANTS: + desired_end_point_keys = [ + 'Conv2d_0', 'Conv2d_1_pointwise', 'Conv2d_2_pointwise', + 'Conv2d_3_pointwise', 'Conv2d_4_pointwise', 'Conv2d_5_pointwise', + 'Conv2d_6_pointwise', 'Conv2d_7_pointwise', 'Conv2d_8_pointwise', + 'Conv2d_9_pointwise', 'Conv2d_10_pointwise', 'Conv2d_11_pointwise', + 'Conv2d_12_pointwise', 'Conv2d_13_pointwise' + ] + else: + raise ValueError('Unsupported model_type %s.' % model_type) + + return desired_end_point_keys + + +def _model_graph_def(model_type='resnet_v1_50', arg_sc=None): + """Constructs a model graph, returning GraphDef and end-points. + + Args: + model_type: Type of model to be used. + arg_sc: Optional arg scope to use in constructing the graph. + + Returns: + graph_def: GraphDef of constructed graph. + end_points: A dictionary from components of the network to the corresponding + activations. + """ + if arg_sc is None: + arg_sc = {} + g = ops.Graph() + with g.as_default(): + with framework.arg_scope(arg_sc): + end_points = _construct_model(model_type) + + return g.as_graph_def(), end_points + + +def _model_rf(graphdef, + end_points, + desired_end_point_keys, + model_type='resnet_v1_50', + csv_writer=None): + """Computes receptive field information for a given CNN model. + + The information will be printed to stdout. If the RF parameters are the same + for the horizontal and vertical directions, it will be printed only once. + Otherwise, they are printed once for the horizontal and once for the vertical + directions. + + Args: + graphdef: GraphDef of given model. + end_points: A dictionary from components of the model to the corresponding + activations. + desired_end_point_keys: List of desired end points for which receptive field + information will be computed. + model_type: Type of model to be used, used only for printing purposes. + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for desired_end_point_key in desired_end_point_keys: + print('- %s:' % desired_end_point_key) + output_node_with_colon = end_points[desired_end_point_key].name + pos = output_node_with_colon.rfind(':') + output_node = output_node_with_colon[:pos] + (receptive_field_x, receptive_field_y, effective_stride_x, + effective_stride_y, effective_padding_x, effective_padding_y + ) = receptive_field.compute_receptive_field_from_graph_def( + graphdef, _INPUT_NODE, output_node) + # If values are the same in horizontal/vertical directions, just report one + # of them. Otherwise, report both. + if (receptive_field_x == receptive_field_y) and ( + effective_stride_x == effective_stride_y) and ( + effective_padding_x == effective_padding_y): + print('Receptive field size = %5s, effective stride = %5s, effective ' + 'padding = %5s' % (str(receptive_field_x), str(effective_stride_x), + str(effective_padding_x))) + else: + print('Receptive field size: horizontal = %5s, vertical = %5s. ' + 'Effective stride: horizontal = %5s, vertical = %5s. Effective ' + 'padding: horizontal = %5s, vertical = %5s' % + (str(receptive_field_x), str(receptive_field_y), + str(effective_stride_x), str(effective_stride_y), + str(effective_padding_x), str(effective_padding_y))) + if csv_writer is not None: + csv_writer.writerow({ + 'CNN': model_type, + 'end_point': desired_end_point_key, + 'RF size hor': str(receptive_field_x), + 'RF size ver': str(receptive_field_y), + 'effective stride hor': str(effective_stride_x), + 'effective stride ver': str(effective_stride_y), + 'effective padding hor': str(effective_padding_x), + 'effective padding ver': str(effective_padding_y) + }) + + +def _process_model_rf(model_type='resnet_v1_50', csv_writer=None, arg_sc=None): + """Contructs model graph and desired end-points, and compute RF. + + The computed RF parameters are printed to stdout by the _model_rf function. + + Args: + model_type: Type of model to be used. + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + arg_sc: Optional arg scope to use in constructing the graph. + + """ + print('********************%s' % model_type) + graphdef, end_points = _model_graph_def(model_type, arg_sc) + desired_end_point_keys = _get_desired_end_point_keys(model_type) + _model_rf(graphdef, end_points, desired_end_point_keys, model_type, + csv_writer) + + +def _resnet_rf(csv_writer=None): + """Computes RF and associated parameters for resnet models. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_RESNET_VARIANTS: + arg_sc = resnet_v1.resnet_arg_scope() + _process_model_rf(model_type, csv_writer, arg_sc) + + +def _inception_resnet_v2_rf(csv_writer=None): + """Computes RF and associated parameters for the inception_resnet_v2 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_INCEPTIONRESNETV2_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _inception_v2_rf(csv_writer=None): + """Computes RF and associated parameters for the inception_v2 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_INCEPTIONV2_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _inception_v3_rf(csv_writer=None): + """Computes RF and associated parameters for the inception_v3 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_INCEPTIONV3_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _inception_v4_rf(csv_writer=None): + """Computes RF and associated parameters for the inception_v4 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_INCEPTIONV4_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _alexnet_v2_rf(csv_writer=None): + """Computes RF and associated parameters for the alexnet_v2 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_ALEXNETV2_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _vgg_rf(csv_writer=None): + """Computes RF and associated parameters for the vgg model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_VGG_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _mobilenet_v1_rf(csv_writer=None): + """Computes RF and associated parameters for the mobilenet_v1 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_MOBILENETV1_VARIANTS: + with slim.arg_scope( + [slim.batch_norm, slim.dropout], is_training=False) as arg_sc: + _process_model_rf(model_type, csv_writer, arg_sc) + + +def main(unused_argv): + # Configure CSV file which will be written, if desired. + if cmd_args.csv_path: + csv_file = open(cmd_args.csv_path, 'w') + field_names = [ + 'CNN', 'end_point', 'RF size hor', 'RF size ver', + 'effective stride hor', 'effective stride ver', 'effective padding hor', + 'effective padding ver' + ] + rf_writer = csv.DictWriter(csv_file, fieldnames=field_names) + rf_writer.writeheader() + else: + rf_writer = None + + # Compute RF parameters for each network architecture. + _alexnet_v2_rf(rf_writer) + _vgg_rf(rf_writer) + _inception_v2_rf(rf_writer) + _inception_v3_rf(rf_writer) + _inception_v4_rf(rf_writer) + _inception_resnet_v2_rf(rf_writer) + _mobilenet_v1_rf(rf_writer) + _resnet_rf(rf_writer) + + # Close CSV file, if it was opened. + if cmd_args.csv_path: + csv_file.close() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.register('type', 'bool', lambda v: v.lower() == 'true') + parser.add_argument( + '--csv_path', + type=str, + default='', + help="""\ + Path to CSV file that will be written with RF parameters.If empty, no + file will be written.\ + """) + cmd_args, unparsed = parser.parse_known_args() + app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py b/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py new file mode 100644 index 0000000000..a2384f66ce --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py @@ -0,0 +1,57 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Simple script to write Inception-ResNet-v2 model to graph file. +""" + +import argparse +import sys + +from nets import inception +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import graph_io +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import app + +cmd_args = None + + +def main(unused_argv): + # Model definition. + g = ops.Graph() + with g.as_default(): + images = array_ops.placeholder( + dtypes.float32, shape=(1, None, None, 3), name='input_image') + inception.inception_resnet_v2_base(images) + + graph_io.write_graph(g.as_graph_def(), cmd_args.graph_dir, + cmd_args.graph_filename) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.register('type', 'bool', lambda v: v.lower() == 'true') + parser.add_argument( + '--graph_dir', + type=str, + default='/tmp', + help='Directory where graph will be saved.') + parser.add_argument( + '--graph_filename', + type=str, + default='graph.pbtxt', + help='Filename of graph that will be saved.') + cmd_args, unparsed = parser.parse_known_args() + app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py b/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py new file mode 100644 index 0000000000..8af4be16d6 --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py @@ -0,0 +1,88 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Library to compute order of computations in a graph. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + + +class GraphDefHelper(object): + """Helper class to collect node names and definitions. + + Example: + b = GraphDefHelper(graph_def) + # Prints node that produces given output. + print b.output_of['conv/foo/bar'] + """ + + def __init__(self, gd): + self.output_of = {} + for each in gd.node: + self.output_of[each.name] = each + + +# pylint: disable=invalid-name +_NodeEntry = collections.namedtuple('NodeEntry', field_names=['order', 'node']) + + +def _get_computed_nodes(g, output, seen): + """Traverses the graph in topological order. + + Args: + g: GraphDefHelper object. + output: current node. + seen: map of nodes we've already traversed. + Returns: + order in topological sort for 'output'. + """ + if output in seen: + return seen[output].order + node_def = g.output_of.get(output, None) + if node_def is None: + seen[output] = _NodeEntry(0, None) + return 0 + + r = 0 + for each in node_def.input: + # Parses name of input node. + if each.startswith('^'): + each = each[1:] + each = each.split(':')[0] + # Recursively computes ordering. + new_v = _get_computed_nodes(g, each, seen) + r = max(r, new_v + 1) + + seen[output] = _NodeEntry(r, node_def) + + return seen[output].order + + +def get_compute_order(graph_def): + """Computes order of computation for a given graph. + + Args: + graph_def: GraphDef object. + Returns: + map: name -> {order, node} + """ + helper = GraphDefHelper(graph_def) + seen = collections.defaultdict(_NodeEntry) + for each in graph_def.node: + _get_computed_nodes(helper, each.name, seen) + return seen diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field.py b/tensorflow/contrib/receptive_field/python/util/receptive_field.py new file mode 100644 index 0000000000..4e723829bf --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/receptive_field.py @@ -0,0 +1,481 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functions to compute receptive field of a fully-convolutional network. + +Please refer to the following g3doc for detailed explanation on how this +computation is performed, and why it is important: +g3doc/photos/vision/features/delf/g3doc/rf_computation.md +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from tensorflow.contrib.receptive_field.python.util import graph_compute_order +from tensorflow.contrib.util import make_ndarray +from tensorflow.python.platform import tf_logging as logging + +# White-listed layer operations, which do not affect the receptive field +# computation. +_UNCHANGED_RF_LAYER_OPS = [ + "Softplus", "Relu", "BiasAdd", "Mul", "Add", "Const", "Identity", + "VariableV2", "Sub", "Rsqrt", "ConcatV2" +] + + +def _stride_size(node): + """Computes stride size given a TF node. + + Args: + node: Tensorflow node (NodeDef proto). + + Returns: + stride_x: Stride size for horizontal direction (integer). + stride_y: Stride size for vertical direction (integer). + """ + strides_attr = node.attr["strides"] + logging.vlog(4, "strides_attr = %s", strides_attr) + stride_y = strides_attr.list.i[1] + stride_x = strides_attr.list.i[2] + return stride_x, stride_y + + +def _conv_kernel_size(node, name_to_order_node): + """Computes kernel size given a TF convolution or pooling node. + + Args: + node: Tensorflow node (NodeDef proto). + name_to_order_node: Map from name to {order, node}. Output of + graph_compute_order.get_compute_order(). + + Returns: + kernel_size_x: Kernel size for horizontal direction (integer). + kernel_size_y: Kernel size for vertical direction (integer). + + Raises: + ValueError: If the weight layer node is invalid. + """ + weights_layer_read_name = node.input[1] + if not weights_layer_read_name.endswith("/read"): + raise ValueError( + "Weight layer's name input to conv layer does not end with '/read'") + weights_layer_param_name = weights_layer_read_name[:-5] + weights_node = name_to_order_node[weights_layer_param_name].node + if weights_node.op != "VariableV2": + raise ValueError("Weight layer is not of type VariableV2") + shape = weights_node.attr["shape"] + logging.vlog(4, "weight shape = %s", shape) + kernel_size_y = shape.shape.dim[0].size + kernel_size_x = shape.shape.dim[1].size + return kernel_size_x, kernel_size_y + + +def _padding_size_conv_pool(node, kernel_size, stride): + """Computes padding size given a TF convolution or pooling node. + + Args: + node: Tensorflow node (NodeDef proto). + kernel_size: Kernel size of node (integer). + stride: Stride size of node (integer). + + Returns: + padding: Padding size (integer). + + Raises: + ValueError: If padding is invalid. + """ + # In this case, we need to carefully consider the different TF padding modes. + # The padding depends on kernel size, and may depend on input size. If it + # depends on input size, we raise an exception. + padding_attr = node.attr["padding"] + logging.vlog(4, "padding_attr = %s", padding_attr) + if padding_attr.s == "VALID": + padding = 0 + elif padding_attr.s == "SAME": + if kernel_size == 1: + padding = 0 + elif stride == 1: + padding = int(math.floor((float(kernel_size) - 1) / 2)) + elif stride == 2 and kernel_size % 2 == 0: + padding = int(math.floor((float(kernel_size) - 1) / 2)) + else: + padding = None + logging.warning( + "Padding depends on input size, which means that the effective " + "padding may be different depending on the input image " + "dimensionality. In this case, alignment check will be skipped.") + else: + raise ValueError("Invalid padding operation") + return padding + + +def _pool_kernel_size(node): + """Computes kernel size given a TF pooling node. + + Args: + node: Tensorflow node (NodeDef proto). + + Returns: + kernel_size_x: Kernel size for horizontal direction (integer). + kernel_size_y: Kernel size for vertical direction (integer). + + Raises: + ValueError: If pooling is invalid. + """ + ksize = node.attr["ksize"] + kernel_size_y = ksize.list.i[1] + kernel_size_x = ksize.list.i[2] + if ksize.list.i[0] != 1: + raise ValueError("pool ksize for first dim is not 1") + if ksize.list.i[3] != 1: + raise ValueError("pool ksize for last dim is not 1") + return kernel_size_x, kernel_size_y + + +def _padding_size_pad_layer(node, name_to_order_node): + """Computes padding size given a TF padding node. + + Args: + node: Tensorflow node (NodeDef proto). + name_to_order_node: Map from name to {order, node}. Output of + graph_compute_order.get_compute_order(). + + Returns: + padding_x: Padding size for horizontal direction (integer). + padding_y: Padding size for vertical direction (integer). + + Raises: + ValueError: If padding layer is invalid. + """ + paddings_layer_name = node.input[1] + if not paddings_layer_name.endswith("/paddings"): + raise ValueError("Padding layer name does not end with '/paddings'") + paddings_node = name_to_order_node[paddings_layer_name].node + if paddings_node.op != "Const": + raise ValueError("Padding op is not Const") + value = paddings_node.attr["value"] + t = make_ndarray(value.tensor) + padding_y = t[1][0] + padding_x = t[2][0] + if t[0][0] != 0: + raise ValueError("padding is not zero for first tensor dim") + if t[3][0] != 0: + raise ValueError("padding is not zero for last tensor dim") + return padding_x, padding_y + + +def _get_layer_params(node, name_to_order_node): + """Gets layer parameters relevant for RF computation. + + Currently, only these nodes are supported: + - Conv2D + - DepthwiseConv2dNative + - Pad + - MaxPool + - AvgPool + - all nodes listed in _UNCHANGED_RF_LAYER_OPS + + Args: + node: Tensorflow node (NodeDef proto). + name_to_order_node: Map from name to {order, node}. Output of + graph_compute_order.get_compute_order(). + + Returns: + kernel_size_x: Kernel size for horizontal direction (integer). + kernel_size_y: Kernel size for vertical direction (integer). + stride_x: Stride size for horizontal direction (integer). + stride_y: Stride size for vertical direction (integer). + padding_x: Padding size for horizontal direction (integer). + padding_y: Padding size for vertical direction (integer). + + Raises: + ValueError: If layer op is unknown. + """ + logging.vlog(3, "node.op = %s", node.op) + logging.vlog(4, "node = %s", node) + if node.op == "Conv2D" or node.op == "DepthwiseConv2dNative": + stride_x, stride_y = _stride_size(node) + kernel_size_x, kernel_size_y = _conv_kernel_size(node, name_to_order_node) + # Compute the padding for this node separately for each direction. + padding_x = _padding_size_conv_pool(node, kernel_size_x, stride_x) + padding_y = _padding_size_conv_pool(node, kernel_size_y, stride_y) + elif node.op == "Pad": + # Kernel and stride are simply 1 in this case. + kernel_size_x = 1 + kernel_size_y = 1 + stride_x = 1 + stride_y = 1 + padding_x, padding_y = _padding_size_pad_layer(node, name_to_order_node) + elif node.op == "MaxPool" or node.op == "AvgPool": + stride_x, stride_y = _stride_size(node) + kernel_size_x, kernel_size_y = _pool_kernel_size(node) + # Compute the padding for this node separately for each direction. + padding_x = _padding_size_conv_pool(node, kernel_size_x, stride_x) + padding_y = _padding_size_conv_pool(node, kernel_size_y, stride_y) + elif node.op in _UNCHANGED_RF_LAYER_OPS: + # These nodes do not modify the RF parameters. + kernel_size_x = 1 + kernel_size_y = 1 + stride_x = 1 + stride_y = 1 + padding_x = 0 + padding_y = 0 + else: + raise ValueError("Unknown layer op: %s" % node.op) + return kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, padding_y + + +def _reverse_sort_by_order(name_to_order_node): + """Sorts map of name_to_order_node nodes in reverse order. + + The output is such that the nodes in name_to_order_node are sorted in + descending order of the "order" field. + + Args: + name_to_order_node: Map from name to {order, node}. Output of + graph_compute_order.get_compute_order(). + + Returns: + sorted_name_to_order_node: Sorted version of the input, in descending order. + """ + return sorted(name_to_order_node.items(), key=lambda x: -x[1].order) + + +def _get_rf_size_node_input(stride, kernel_size, rf_size_output): + """Computes RF size at the input of a given layer. + + Args: + stride: Stride of given layer (integer). + kernel_size: Kernel size of given layer (integer). + rf_size_output: RF size at output of given layer (integer). + + Returns: + rf_size_input: RF size at input of given layer (integer). + """ + return stride * rf_size_output + kernel_size - stride + + +def _get_effective_stride_node_input(stride, effective_stride_output): + """Computes effective stride at the input of a given layer. + + Args: + stride: Stride of given layer (integer). + effective_stride_output: Effective stride at output of given layer + (integer). + + Returns: + effective_stride_input: Effective stride at input of given layer + (integer). + """ + return stride * effective_stride_output + + +def _get_effective_padding_node_input(stride, padding, + effective_padding_output): + """Computes effective padding at the input of a given layer. + + Args: + stride: Stride of given layer (integer). + padding: Padding of given layer (integer). + effective_padding_output: Effective padding at output of given layer + (integer). + + Returns: + effective_padding_input: Effective padding at input of given layer + (integer). + """ + return stride * effective_padding_output + padding + + +def compute_receptive_field_from_graph_def(graph_def, input_node, output_node): + """Computes receptive field (RF) parameters from a GraphDef object. + + Args: + graph_def: GraphDef object. + input_node: Name of the input node from graph. + output_node: Name of the output node from graph. + + Returns: + rf_size_x: Receptive field size of network in the horizontal direction, with + respect to specified input and output. + rf_size_y: Receptive field size of network in the vertical direction, with + respect to specified input and output. + effective_stride_x: Effective stride of network in the horizontal direction, + with respect to specified input and output. + effective_stride_y: Effective stride of network in the vertical direction, + with respect to specified input and output. + effective_padding_x: Effective padding of network in the horizontal + direction, with respect to specified input and output. + effective_padding_y: Effective padding of network in the vertical + direction, with respect to specified input and output. + + Raises: + ValueError: If network is not aligned or if either input or output nodes + cannot be found. For network criterion alignment, see + photos/vision/features/delf/g3doc/rf_computation.md + """ + # Computes order of computation for a given graph. + name_to_order_node = graph_compute_order.get_compute_order( + graph_def=graph_def) + + # Sort in reverse topological order. + order = _reverse_sort_by_order(name_to_order_node) + + # Dictionaries to keep track of receptive field, effective stride and + # effective padding of different nodes. + rf_sizes_x = {} + rf_sizes_y = {} + effective_strides_x = {} + effective_strides_y = {} + effective_paddings_x = {} + effective_paddings_y = {} + + # Initialize dicts for output_node. + rf_sizes_x[output_node] = 1 + rf_sizes_y[output_node] = 1 + effective_strides_x[output_node] = 1 + effective_strides_y[output_node] = 1 + effective_paddings_x[output_node] = 0 + effective_paddings_y[output_node] = 0 + + # Flag to denote if we found output node yet. If we have not, we skip nodes + # until the output node is found. + found_output_node = False + + # Flag to denote if padding is undefined. This happens when SAME padding mode + # is used in conjunction with stride and kernel sizes which make it such that + # the padding to be applied would depend on the input size. In this case, + # alignment checks are skipped, and the effective padding is None. + undefined_padding = False + + for _, (o, node) in order: + if node: + logging.vlog(3, "%10d %-100s %-20s" % (o, node.name[:90], node.op)) + else: + continue + + # When we find input node, we can stop. + if node.name == input_node: + break + + # Loop until we find the output node. All nodes before finding the output + # one are irrelevant, so they can be skipped. + if not found_output_node: + if node.name == output_node: + found_output_node = True + + if found_output_node: + if node.name not in rf_sizes_x: + assert node.name not in rf_sizes_y, ("Node %s is in rf_sizes_y, but " + "not in rf_sizes_x" % node.name) + # In this case, node is not relevant since it's not part of the + # computation we're interested in. + logging.vlog(3, "Irrelevant node %s, skipping it...", node.name) + continue + + # Get params for this layer. + kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, padding_y = ( + _get_layer_params(node, name_to_order_node)) + logging.vlog(3, "kernel_size_x = %s, kernel_size_y = %s, " + "stride_x = %s, stride_y = %s, " + "padding_x = %s, padding_y = %s" % + (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, + padding_y)) + if padding_x is None or padding_y is None: + undefined_padding = True + + # Get parameters at input of this layer which may or may not be propagated + # to the input layers. + rf_size_input_x = _get_rf_size_node_input(stride_x, kernel_size_x, + rf_sizes_x[node.name]) + rf_size_input_y = _get_rf_size_node_input(stride_y, kernel_size_y, + rf_sizes_y[node.name]) + effective_stride_input_x = _get_effective_stride_node_input( + stride_x, effective_strides_x[node.name]) + effective_stride_input_y = _get_effective_stride_node_input( + stride_y, effective_strides_y[node.name]) + if not undefined_padding: + effective_padding_input_x = _get_effective_padding_node_input( + stride_x, padding_x, effective_paddings_x[node.name]) + effective_padding_input_y = _get_effective_padding_node_input( + stride_y, padding_y, effective_paddings_y[node.name]) + else: + effective_padding_input_x = None + effective_padding_input_y = None + + # Loop over this node's inputs and potentially propagate information down. + for inp_name in node.input: + logging.vlog(4, "inp_name = %s", inp_name) + inp_node = name_to_order_node[inp_name].node + logging.vlog(4, "inp_node = \n%s", inp_node) + if inp_node.name in rf_sizes_x: + assert inp_node.name in rf_sizes_y, ( + "Node %s is in rf_sizes_x, but " + "not in rf_sizes_y" % inp_node.name) + # This node was already discovered through a previous path, so we need + # to make sure that graph is aligned. This alignment check is skipped + # if the padding is not defined, since in this case alignment cannot + # be checked. + if not undefined_padding: + if effective_strides_x[inp_node.name] != effective_stride_input_x: + raise ValueError( + "Graph is not aligned since effective stride from different " + "paths is different in horizontal direction") + if effective_strides_y[inp_node.name] != effective_stride_input_y: + raise ValueError( + "Graph is not aligned since effective stride from different " + "paths is different in vertical direction") + if (rf_sizes_x[inp_node.name] - 1 + ) / 2 - effective_paddings_x[inp_node.name] != ( + rf_size_input_x - 1) / 2 - effective_padding_input_x: + raise ValueError( + "Graph is not aligned since center shift from different " + "paths is different in horizontal direction") + if (rf_sizes_y[inp_node.name] - 1 + ) / 2 - effective_paddings_y[inp_node.name] != ( + rf_size_input_y - 1) / 2 - effective_padding_input_y: + raise ValueError( + "Graph is not aligned since center shift from different " + "paths is different in vertical direction") + # Keep track of path with largest RF, for both directions. + if rf_sizes_x[inp_node.name] < rf_size_input_x: + rf_sizes_x[inp_node.name] = rf_size_input_x + effective_strides_x[inp_node.name] = effective_stride_input_x + effective_paddings_x[inp_node.name] = effective_padding_input_x + if rf_sizes_y[inp_node.name] < rf_size_input_y: + rf_sizes_y[inp_node.name] = rf_size_input_y + effective_strides_y[inp_node.name] = effective_stride_input_y + effective_paddings_y[inp_node.name] = effective_padding_input_y + else: + assert inp_node.name not in rf_sizes_y, ( + "Node %s is in rf_sizes_y, but " + "not in rf_sizes_x" % inp_node.name) + # In this case, it is the first time we encounter this node. So we + # propagate the RF parameters. + rf_sizes_x[inp_node.name] = rf_size_input_x + rf_sizes_y[inp_node.name] = rf_size_input_y + effective_strides_x[inp_node.name] = effective_stride_input_x + effective_strides_y[inp_node.name] = effective_stride_input_y + effective_paddings_x[inp_node.name] = effective_padding_input_x + effective_paddings_y[inp_node.name] = effective_padding_input_y + + if not found_output_node: + raise ValueError("Output node was not found") + if input_node not in rf_sizes_x: + raise ValueError("Input node was not found") + return (rf_sizes_x[input_node], rf_sizes_y[input_node], + effective_strides_x[input_node], effective_strides_y[input_node], + effective_paddings_x[input_node], effective_paddings_y[input_node]) diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py new file mode 100644 index 0000000000..44e5beda60 --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py @@ -0,0 +1,221 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for receptive_fields module.""" + +from tensorflow.contrib import slim +from tensorflow.contrib.receptive_field.python.util import receptive_field +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import nn +from tensorflow.python.platform import test + + +def create_test_network_1(): + """Aligned network for test. + + The graph corresponds to the example from the second figure in + go/cnn-rf-computation#arbitrary-computation-graphs + + Returns: + g: Tensorflow graph object (Graph proto). + """ + g = ops.Graph() + with g.as_default(): + # An 8x8 test image. + x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image') + # Left branch. + l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') + # Right branch. + l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]]) + l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID') + l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='VALID') + # Addition. + nn.relu(l1 + l3, name='output') + return g + + +def create_test_network_2(): + """Aligned network for test. + + The graph corresponds to a variation to the example from the second figure in + go/cnn-rf-computation#arbitrary-computation-graphs. Layers 2 and 3 are changed + to max-pooling operations. Since the functionality is the same as convolution, + the network is aligned and the receptive field size is the same as from the + network created using create_test_network_1(). + + Returns: + g: Tensorflow graph object (Graph proto). + """ + g = ops.Graph() + with g.as_default(): + # An 8x8 test image. + x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image') + # Left branch. + l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') + # Right branch. + l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]]) + l2 = slim.max_pool2d(l2_pad, [3, 3], stride=2, scope='L2', padding='VALID') + l3 = slim.max_pool2d(l2, [1, 1], stride=2, scope='L3', padding='VALID') + # Addition. + nn.relu(l1 + l3, name='output') + return g + + +def create_test_network_3(): + """Misaligned network for test. + + The graph corresponds to the example from the first figure in + go/cnn-rf-computation#arbitrary-computation-graphs + + Returns: + g: Tensorflow graph object (Graph proto). + """ + g = ops.Graph() + with g.as_default(): + # An 8x8 test image. + x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image') + # Left branch. + l1_pad = array_ops.pad(x, [[0, 0], [2, 1], [2, 1], [0, 0]]) + l1 = slim.conv2d(l1_pad, 1, [5, 5], stride=2, scope='L1', padding='VALID') + # Right branch. + l2 = slim.conv2d(x, 1, [3, 3], stride=1, scope='L2', padding='VALID') + l3 = slim.conv2d(l2, 1, [3, 3], stride=1, scope='L3', padding='VALID') + # Addition. + nn.relu(l1 + l3, name='output') + return g + + +def create_test_network_4(): + """Misaligned network for test. + + The graph corresponds to a variation from the example from the second figure + in go/cnn-rf-computation#arbitrary-computation-graphs. Layer 2 uses 'SAME' + padding, which makes its padding dependent on the input image dimensionality. + In this case, the effective padding will be undetermined, and the utility is + not able to check the network alignment. + + Returns: + g: Tensorflow graph object (Graph proto). + """ + g = ops.Graph() + with g.as_default(): + # An 8x8 test image. + x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image') + # Left branch. + l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') + # Right branch. + l2 = slim.conv2d(x, 1, [3, 3], stride=2, scope='L2', padding='SAME') + l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='VALID') + # Addition. + nn.relu(l1 + l3, name='output') + return g + + +def create_test_network_5(): + """Single-path network for testing non-square kernels. + + The graph is similar to the right branch of the graph from + create_test_network_1(), except that the kernel sizes are changed to be + non-square. + + Returns: + g: Tensorflow graph object (Graph proto). + """ + g = ops.Graph() + with g.as_default(): + # An 8x8 test image. + x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image') + # Two convolutional layers, where the first one has non-square kernel. + l1 = slim.conv2d(x, 1, [3, 5], stride=2, scope='L1', padding='VALID') + l2 = slim.conv2d(l1, 1, [3, 1], stride=2, scope='L2', padding='VALID') + # ReLU. + nn.relu(l2, name='output') + return g + + +class RfUtilsTest(test.TestCase): + + def testComputeRFFromGraphDefAligned(self): + graph_def = create_test_network_1().as_graph_def() + input_node = 'input_image' + output_node = 'output' + (receptive_field_x, receptive_field_y, effective_stride_x, + effective_stride_y, effective_padding_x, effective_padding_y) = ( + receptive_field.compute_receptive_field_from_graph_def( + graph_def, input_node, output_node)) + self.assertEqual(receptive_field_x, 3) + self.assertEqual(receptive_field_y, 3) + self.assertEqual(effective_stride_x, 4) + self.assertEqual(effective_stride_y, 4) + self.assertEqual(effective_padding_x, 1) + self.assertEqual(effective_padding_y, 1) + + def testComputeRFFromGraphDefAligned2(self): + graph_def = create_test_network_2().as_graph_def() + input_node = 'input_image' + output_node = 'output' + (receptive_field_x, receptive_field_y, effective_stride_x, + effective_stride_y, effective_padding_x, effective_padding_y) = ( + receptive_field.compute_receptive_field_from_graph_def( + graph_def, input_node, output_node)) + self.assertEqual(receptive_field_x, 3) + self.assertEqual(receptive_field_y, 3) + self.assertEqual(effective_stride_x, 4) + self.assertEqual(effective_stride_y, 4) + self.assertEqual(effective_padding_x, 1) + self.assertEqual(effective_padding_y, 1) + + def testComputeRFFromGraphDefUnaligned(self): + graph_def = create_test_network_3().as_graph_def() + input_node = 'input_image' + output_node = 'output' + with self.assertRaises(ValueError): + receptive_field.compute_receptive_field_from_graph_def( + graph_def, input_node, output_node) + + def testComputeRFFromGraphDefUnaligned2(self): + graph_def = create_test_network_4().as_graph_def() + input_node = 'input_image' + output_node = 'output' + (receptive_field_x, receptive_field_y, effective_stride_x, + effective_stride_y, effective_padding_x, effective_padding_y) = ( + receptive_field.compute_receptive_field_from_graph_def( + graph_def, input_node, output_node)) + self.assertEqual(receptive_field_x, 3) + self.assertEqual(receptive_field_y, 3) + self.assertEqual(effective_stride_x, 4) + self.assertEqual(effective_stride_y, 4) + self.assertEqual(effective_padding_x, None) + self.assertEqual(effective_padding_y, None) + + def testComputeRFFromGraphDefNonSquareRF(self): + graph_def = create_test_network_5().as_graph_def() + input_node = 'input_image' + output_node = 'output' + (receptive_field_x, receptive_field_y, effective_stride_x, + effective_stride_y, effective_padding_x, effective_padding_y) = ( + receptive_field.compute_receptive_field_from_graph_def( + graph_def, input_node, output_node)) + self.assertEqual(receptive_field_x, 5) + self.assertEqual(receptive_field_y, 7) + self.assertEqual(effective_stride_x, 4) + self.assertEqual(effective_stride_y, 4) + self.assertEqual(effective_padding_x, 0) + self.assertEqual(effective_padding_y, 0) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index ae93cf210b..bb6334942f 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -157,6 +157,7 @@ sh_binary( "//tensorflow/contrib/ndlstm:ndlstm", "//tensorflow/contrib/nn:nn_py", "//tensorflow/contrib/predictor:predictor_pip", + "//tensorflow/contrib/receptive_field:receptive_field_pip", "//tensorflow/contrib/session_bundle:session_bundle_pip", "//tensorflow/contrib/signal:signal_py", "//tensorflow/contrib/slim:slim", -- GitLab From 9b7e05a6f643ba41fae86e2081c31634262146e2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 31 Aug 2017 11:00:19 -0700 Subject: [PATCH 147/294] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 167160397 --- tensorflow/go/op/wrappers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 0781347fd6..9583ffb38b 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -20306,7 +20306,7 @@ func Prod(scope *Scope, input tf.Output, reduction_indices tf.Output, optional . // gradients: The backpropagated gradients to the corresponding softsign operation. // features: The features passed as input to the corresponding softsign operation. // -// Returns The gradients: `gradients / (1 + abs(-features)) ** 2`. +// Returns The gradients: `gradients / (1 + abs(features)) ** 2`. func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { if scope.Err() != nil { return -- GitLab From cf3e3d19ed7e4b2d74f5fddc5785acba7df5b818 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 31 Aug 2017 11:28:21 -0700 Subject: [PATCH 148/294] Change node name index in InstantiateFunction from an unordered map to an ordered map. Use ordering to improve control edge instantiation algorithm from O(num control edges * num nodes) algorithm to ~O(num control edges * log(num nodes)). PiperOrigin-RevId: 167164967 --- tensorflow/core/framework/function.cc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc index c2d3f37ab3..b788d6b777 100644 --- a/tensorflow/core/framework/function.cc +++ b/tensorflow/core/framework/function.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/function.h" +#include #include #include #include @@ -271,12 +272,17 @@ class FunctionInstantiationHelper { int nid = -1; const string node_name = input.substr(1); const string node_colon = node_name + ":"; - for (const auto& p : index_) { - if (p.first == node_name || - tensorflow::StringPiece(p.first).starts_with(node_colon)) { - nid = p.second.nid; + const string node_colon_bound = node_name + ";"; + // index_ is a map sorted lexicographically, so the key we are looking for + // must lie in the range [node_name, node_colon_bound). + auto it = index_.lower_bound(node_name); + while (it != index_.end() && it->first <= node_colon_bound) { + if (it->first == node_name || + tensorflow::StringPiece(it->first).starts_with(node_colon)) { + nid = it->second.nid; break; } + ++it; } if (nid == -1) { return errors::InvalidArgument("input[", i, "] == '", input, @@ -421,7 +427,7 @@ class FunctionInstantiationHelper { GetFunctionSignature get_function_; InstantiationResult& result_; // A small index for all names that can be used as a node's input arguments. - std::unordered_map index_; + std::map index_; // This contains information about a node in the new graph including the node // names and input nodes' indexes. struct NodeInfo { -- GitLab From a6510237f06f03babfbcce1bcfa77f07f0d03c50 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Thu, 31 Aug 2017 14:14:40 -0500 Subject: [PATCH 149/294] Add eager_pip to simple_console_for_windows.zip (#12680) * Add eager_pip to simple_console_for_windows.zip * Windows: re-enable tests in //tensorflow/python/eager --- tensorflow/python/eager/BUILD | 5 ----- tensorflow/tools/pip_package/BUILD | 1 + 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 3f56c4f0e1..4bb810a6c8 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -121,7 +121,6 @@ cuda_py_test( "//tensorflow/python:resource_variable_ops", "//tensorflow/python:training", ], - tags = ["no_windows"], ) cuda_py_test( @@ -153,7 +152,6 @@ cuda_py_test( "//tensorflow/python:clip_ops", "//tensorflow/python:math_ops", ], - tags = ["no_windows"], ) py_library( @@ -356,7 +354,6 @@ py_test( name = "benchmarks_test", srcs = ["benchmarks_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":backprop", ":context", @@ -374,7 +371,6 @@ py_test( name = "tape_test", srcs = ["tape_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":backprop", ":context", @@ -395,7 +391,6 @@ py_test( name = "tensor_node_test", srcs = ["tensor_node_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":tensor", ":tensor_node", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index ae93cf210b..34cb19f9cb 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -84,6 +84,7 @@ py_binary( "//tensorflow/python/saved_model", "//tensorflow/python:spectral_ops_test_util", "//tensorflow/python/tools:tools_pip", + "//tensorflow/python/eager:eager_pip", # These targets don't build on Windows yet. Exclude them for now. # "//tensorflow/contrib/ndlstm", # "//tensorflow/contrib/slim", -- GitLab From a674130dbe377294808f17a09a380ca1b6fd5f49 Mon Sep 17 00:00:00 2001 From: Fritz Obermeyer Date: Thu, 31 Aug 2017 13:54:40 -0700 Subject: [PATCH 150/294] Expose C API symbols on OS X --- tensorflow/c/version_script.lds | 2 +- tensorflow/tf_exported_symbols.lds | 2 +- tensorflow/tf_version_script.lds | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/version_script.lds b/tensorflow/c/version_script.lds index 455bd7362b..9bdc6dcc2e 100644 --- a/tensorflow/c/version_script.lds +++ b/tensorflow/c/version_script.lds @@ -1,7 +1,7 @@ VERS_1.0 { # Export symbols in c_api.h. global: - TF_*; + *TF_*; # Hide everything else. local: diff --git a/tensorflow/tf_exported_symbols.lds b/tensorflow/tf_exported_symbols.lds index 4597d929a1..af5341a8d6 100644 --- a/tensorflow/tf_exported_symbols.lds +++ b/tensorflow/tf_exported_symbols.lds @@ -1,5 +1,5 @@ *tensorflow* *perftools*gputools* *tf_* -TF_* +*TF_* *nsync_* diff --git a/tensorflow/tf_version_script.lds b/tensorflow/tf_version_script.lds index 88b64eb1f0..aeb7d66b32 100644 --- a/tensorflow/tf_version_script.lds +++ b/tensorflow/tf_version_script.lds @@ -2,7 +2,7 @@ tensorflow { global: *tensorflow*; *perftools*gputools*; - TF_*; + *TF_*; *nsync_*; local: *; -- GitLab From 6cdc01c1dcfe9300dc5b1ecede781ce65e7fc005 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 31 Aug 2017 14:09:26 -0700 Subject: [PATCH 151/294] Automatically fill in num_classes, growing_mode, pruning_mode, learning_rate and multi_class_strategy if not specified. PiperOrigin-RevId: 167188663 --- .../estimator_batch/estimator.py | 12 +++++++ .../boosted_trees/estimator_batch/model.py | 1 + .../bias-feature-column-handler_test.cc | 1 + ...categorical-feature-column-handler_test.cc | 2 +- ...e-quantized-feature-column-handler_test.cc | 2 +- ...e-quantized-feature-column-handler_test.cc | 2 +- .../stochastic/stats/node-stats_test.cc | 7 +++++ .../contrib/boosted_trees/proto/learner.proto | 30 ++++++++++-------- .../kernel_tests/prediction_ops_test.py | 1 + .../python/training/functions/gbdt_batch.py | 31 ++++++++++++++++++- .../training/functions/gbdt_batch_test.py | 18 +++++------ 11 files changed, 81 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py index e28adad53e..f8028acbdb 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py @@ -61,11 +61,19 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): logits_modifier_function: A modifier function for the logits. center_bias: Whether a separate tree should be created for first fitting the bias. + + Raises: + ValueError: If learner_config is not valid. """ head = head_lib.multi_class_head( n_classes=n_classes, weight_column_name=weight_column_name, enable_centered_bias=False) + if learner_config.num_classes == 0: + learner_config.num_classes = n_classes + elif learner_config.num_classes != n_classes: + raise ValueError("n_classes (%d) doesn't match learner_config (%d)." % + (learner_config.num_classes, n_classes)) super(GradientBoostedDecisionTreeClassifier, self).__init__( model_fn=model.model_builder, params={ @@ -129,6 +137,10 @@ class GradientBoostedDecisionTreeRegressor(estimator.Estimator): label_dimension=label_dimension, weight_column_name=weight_column_name, enable_centered_bias=False) + if label_dimension == 1: + learner_config.num_classes = 2 + else: + learner_config.num_classes = label_dimension super(GradientBoostedDecisionTreeRegressor, self).__init__( model_fn=model.model_builder, params={ diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/model.py b/tensorflow/contrib/boosted_trees/estimator_batch/model.py index 2d517f7811..8cda5c8f2b 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/model.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/model.py @@ -92,6 +92,7 @@ def model_builder(features, labels, mode, params, config): examples_per_layer=examples_per_layer, learner_config=learner_config, feature_columns=feature_columns, + logits_dimension=head.logits_dimension, features=features) with ops.name_scope("gbdt", "gbdt_optimizer"): predictions_dict = gbdt_model.predict(mode) diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc index 82664aed72..f4c7df7fab 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc @@ -42,6 +42,7 @@ class BiasFeatureColumnHandlerTest : public ::testing::Test { example_partitions_({0, 0, 1, 3}) { // Set L2 regularization. learner_config_.mutable_regularization()->set_l2(2.0f); + learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); // Create handler. handler_.reset(new BiasFeatureColumnHandler(kClassId, kSlotId, kBatchSize)); diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc index abd7238464..ea82b3f086 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc @@ -51,7 +51,7 @@ class CategoricalFeatureColumnHandlerTest : public ::testing::Test { values_(test::AsTensor({1, 2, 2, 0}, {4})) { // Set L2 regularization. learner_config_.mutable_regularization()->set_l2(2.0f); - + learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); // Create handler. handler_.reset(new CategoricalFeatureColumnHandler( kClassId, kSlotId, kBatchSize, kFeatureColumn, indices_.matrix(), diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc index 396f48e532..1bc9d733ad 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc @@ -51,7 +51,7 @@ class DenseQuantizedFeatureColumnHandlerTest : public ::testing::Test { dense_quantized_values_(test::AsTensor({1, 1, 0, 1}, {4})) { // Set L2 regularization. learner_config_.mutable_regularization()->set_l2(2.0f); - + learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); // Create handler. handler_.reset(new DenseQuantizedFeatureColumnHandler( kClassId, kSlotId, kBatchSize, kFeatureColumn, diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc index db8c64a617..643d936ad2 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc @@ -53,7 +53,7 @@ class SparseQuantizedFeatureColumnHandlerTest : public ::testing::Test { sparse_quantized_values_(test::AsTensor({1, 0, 1}, {3})) { // Set L2 regularization. learner_config_.mutable_regularization()->set_l2(2.0f); - + learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); // Create handler. handler_.reset(new SparseQuantizedFeatureColumnHandler( kClassId, kSlotId, kBatchSize, kFeatureColumn, diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc index f99b6826a7..ecb7a04efb 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc @@ -30,6 +30,7 @@ const double kDelta = 1e-5; TEST(NodeStatsTest, AlmostZero) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); NodeStats node_stats(learner_config, GradientStats(1e-8f, 1e-8f)); EXPECT_EQ(0, node_stats.weight_contribution[0]); EXPECT_EQ(0, node_stats.gain); @@ -37,6 +38,7 @@ TEST(NodeStatsTest, AlmostZero) { TEST(NodeStatsTest, LessThanMinWeightConstraint) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_constraints()->set_min_node_weight(3.2f); NodeStats node_stats(learner_config, GradientStats(7.32f, 1.63f)); EXPECT_EQ(0, node_stats.weight_contribution[0]); @@ -45,6 +47,7 @@ TEST(NodeStatsTest, LessThanMinWeightConstraint) { TEST(NodeStatsTest, L1RegSquashed) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_regularization()->set_l1(10.0f); NodeStats node_stats(learner_config, GradientStats(7.32f, 1.63f)); EXPECT_EQ(0, node_stats.weight_contribution[0]); @@ -53,6 +56,7 @@ TEST(NodeStatsTest, L1RegSquashed) { TEST(NodeStatsTest, L1RegPos) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_regularization()->set_l1(5.0f); NodeStats node_stats(learner_config, GradientStats(7.32f, 1.63f)); const float expected_clipped_grad = 7.32f - 5.0f; @@ -66,6 +70,7 @@ TEST(NodeStatsTest, L1RegPos) { TEST(NodeStatsTest, L1RegNeg) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_regularization()->set_l1(5.0f); NodeStats node_stats(learner_config, GradientStats(-7.32f, 1.63f)); const float expected_clipped_grad = -7.32f + 5.0f; @@ -79,6 +84,7 @@ TEST(NodeStatsTest, L1RegNeg) { TEST(NodeStatsTest, L2Reg) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_regularization()->set_l2(8.0f); NodeStats node_stats(learner_config, GradientStats(7.32f, 1.63f)); const float expected_denom = 1.63f + 8.0f; @@ -91,6 +97,7 @@ TEST(NodeStatsTest, L2Reg) { TEST(NodeStatsTest, L1L2Reg) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_regularization()->set_l1(5.0f); learner_config.mutable_regularization()->set_l2(8.0f); NodeStats node_stats(learner_config, GradientStats(7.32f, 1.63f)); diff --git a/tensorflow/contrib/boosted_trees/proto/learner.proto b/tensorflow/contrib/boosted_trees/proto/learner.proto index 06ee223467..919e7cd814 100644 --- a/tensorflow/contrib/boosted_trees/proto/learner.proto +++ b/tensorflow/contrib/boosted_trees/proto/learner.proto @@ -17,7 +17,7 @@ message TreeRegularizationConfig { // Tree constraints config. message TreeConstraintsConfig { - // Maximum depth of the trees. + // Maximum depth of the trees. The default value is 6 if not specified. uint32 max_tree_depth = 1; // Min hessian weight per node. @@ -86,20 +86,22 @@ message LearningRateDropoutDrivenConfig { message LearnerConfig { enum PruningMode { - PRE_PRUNE = 0; - POST_PRUNE = 1; + PRUNING_MODE_UNSPECIFIED = 0; + PRE_PRUNE = 1; + POST_PRUNE = 2; } enum GrowingMode { - WHOLE_TREE = 0; - // Layer by layer is only supported by the batch learner. - LAYER_BY_LAYER = 1; + GROWING_MODE_UNSPECIFIED = 0; + WHOLE_TREE = 1; + LAYER_BY_LAYER = 2; } enum MultiClassStrategy { - TREE_PER_CLASS = 0; - FULL_HESSIAN = 1; - DIAGONAL_HESSIAN = 2; + MULTI_CLASS_STRATEGY_UNSPECIFIED = 0; + TREE_PER_CLASS = 1; + FULL_HESSIAN = 2; + DIAGONAL_HESSIAN = 3; } // Number of classes. @@ -118,16 +120,18 @@ message LearnerConfig { // Constraints. TreeConstraintsConfig constraints = 5; - // Pruning. + // Pruning. POST_PRUNE is the default pruning mode. PruningMode pruning_mode = 8; - // Growing Mode. + // Growing Mode. LAYER_BY_LAYER is the default growing mode. GrowingMode growing_mode = 9; - // Learning rate. + // Learning rate. By default we use fixed learning rate of 0.1. LearningRateConfig learning_rate_tuner = 6; - // Multi-class strategy. + // Multi-class strategy. By default we use TREE_PER_CLASS for binary + // classification and linear regression. For other cases, we use + // DIAGONAL_HESSIAN as the default. MultiClassStrategy multi_class_strategy = 10; // If you want to average the ensembles (for regularization), provide the diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py index 51e084b79c..37595f1c75 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py @@ -344,6 +344,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 + learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE result, result_no_dropout, dropout_info = ( prediction_ops.gradient_trees_prediction( diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index 6f85874a33..83c88a0442 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -261,6 +261,7 @@ class GradientBoostedDecisionTreeModel(object): examples_per_layer, learner_config, features, + logits_dimension, feature_columns=None): """Construct a new GradientBoostedDecisionTreeModel function. @@ -273,8 +274,8 @@ class GradientBoostedDecisionTreeModel(object): a tree layer. It can also be a function that computes the number of examples based on the depth of the layer that's being built. learner_config: A learner config. - print split, sorted_feature_names[split.feature_column] features: `dict` of `Tensor` objects. + logits_dimension: An int, the dimension of logits. feature_columns: A list of feature columns. Raises: @@ -289,11 +290,39 @@ class GradientBoostedDecisionTreeModel(object): if learner_config.num_classes < 2: raise ValueError("Number of classes must be >=2") + self._logits_dimension = logits_dimension self._is_chief = is_chief self._num_ps_replicas = num_ps_replicas self._ensemble_handle = ensemble_handle self._center_bias = center_bias self._examples_per_layer = examples_per_layer + + # Fill in the defaults. + if (learner_config.multi_class_strategy == + learner_pb2.LearnerConfig.MULTI_CLASS_STRATEGY_UNSPECIFIED): + if logits_dimension == 1: + learner_config.multi_class_strategy = ( + learner_pb2.LearnerConfig.TREE_PER_CLASS) + else: + learner_config.multi_class_strategy = ( + learner_pb2.LearnerConfig.DIAGONAL_HESSIAN) + + if (learner_config.growing_mode == + learner_pb2.LearnerConfig.GROWING_MODE_UNSPECIFIED): + learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER + + if (learner_config.pruning_mode == + learner_pb2.LearnerConfig.PRUNING_MODE_UNSPECIFIED): + learner_config.pruning_mode = learner_pb2.LearnerConfig.POST_PRUNE + + if learner_config.constraints.max_tree_depth == 0: + # Use 6 as the default maximum depth. + learner_config.constraints.max_tree_depth = 6 + + tuner = learner_config.learning_rate_tuner.WhichOneof("tuner") + if not tuner: + learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 + self._learner_config = learner_config self._feature_columns = feature_columns self._learner_config_serialized = learner_config.SerializeToString() diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py index 9ce434edf8..16e24d97dd 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py @@ -164,7 +164,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -268,7 +268,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=num_examples_fn, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -371,7 +371,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -442,7 +442,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -505,7 +505,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -588,7 +588,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) # Create predict op. mode = model_fn.ModeKeys.EVAL @@ -627,7 +627,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=5, features=features) predictions = array_ops.constant( [[0.0, -1.0, 0.5, 1.2, 3.1], [1.0, 0.0, 0.8, 0.3, 1.0], @@ -730,7 +730,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=5, features=features) predictions = array_ops.constant( [[0.0, -1.0, 0.5, 1.2, 3.1], [1.0, 0.0, 0.8, 0.3, 1.0], @@ -833,7 +833,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=5, features=features) batch_size = 3 predictions = array_ops.constant( -- GitLab From 569af010a7faf0744fd366648a8c4b3bf18e35c3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 31 Aug 2017 14:45:18 -0700 Subject: [PATCH 152/294] Fixing small issues introduced with tf.contrib.receptive_field PiperOrigin-RevId: 167194226 --- tensorflow/contrib/receptive_field/BUILD | 3 +++ .../receptive_field/python/util/examples/compute_rf.py | 4 ++++ .../python/util/examples/write_inception_resnet_v2_graph.py | 4 ++++ .../receptive_field/python/util/receptive_field_test.py | 4 ++++ 4 files changed, 15 insertions(+) diff --git a/tensorflow/contrib/receptive_field/BUILD b/tensorflow/contrib/receptive_field/BUILD index ed2f3af08c..e2d7c07510 100644 --- a/tensorflow/contrib/receptive_field/BUILD +++ b/tensorflow/contrib/receptive_field/BUILD @@ -47,6 +47,9 @@ py_test( name = "receptive_field_test", srcs = ["python/util/receptive_field_test.py"], srcs_version = "PY2AND3", + tags = [ + "no_oss", # see b/65254194 + ], deps = [ ":receptive_field_py", "//tensorflow/contrib/framework:framework_py", diff --git a/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py b/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py index 70a0d11dff..1cf978b90a 100644 --- a/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py +++ b/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py @@ -17,6 +17,10 @@ For an example of usage, see accompanying file compute_rf.sh """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import argparse import sys diff --git a/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py b/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py index a2384f66ce..793ae163d8 100644 --- a/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py +++ b/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py @@ -15,6 +15,10 @@ """Simple script to write Inception-ResNet-v2 model to graph file. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import argparse import sys diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py index 44e5beda60..2771389250 100644 --- a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py +++ b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py @@ -14,6 +14,10 @@ # ============================================================================== """Tests for receptive_fields module.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + from tensorflow.contrib import slim from tensorflow.contrib.receptive_field.python.util import receptive_field from tensorflow.python.framework import dtypes -- GitLab From 91617d22fc5868948a361e04a0642a765a092544 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 31 Aug 2017 14:45:25 -0700 Subject: [PATCH 153/294] [XLA] Dump nested fusion nodes without crashing PiperOrigin-RevId: 167194247 --- .../compiler/xla/service/hlo_graph_dumper.cc | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index dfb111d1d0..07b3369d5c 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -561,13 +561,21 @@ tooltip = " "; } string comp_body = DumpComputation(subcomp); - string computation = - Printf(computation_fmt, id, style, subcomp_label, comp_body, id); - // Add an edge from the subcomputation to its parent node. If subcomp - // belongs to a fusion node, it's drawn in place of the fusion instruction, so - // there's no need to link those. - if (parent_instr->opcode() != HloOpcode::kFusion) { + if (parent_instr->opcode() == HloOpcode::kFusion) { + // Dump any nested fusion nodes. + for (const auto& subcomp_instr : subcomp->instructions()) { + if (subcomp_instr->opcode() == HloOpcode::kFusion) { + StrAppend( + &comp_body, + DumpSubcomputation(subcomp_instr->fused_instructions_computation(), + subcomp_instr.get())); + } + } + } else { + // Add an edge from the subcomputation to its parent node. If subcomp + // belongs to a fusion node, it's drawn in place of the fusion instruction, + // so there's no need to link those. edge_ids_.insert( {{subcomp->root_instruction(), parent_instr}, next_edge_id_++}); const char* edge_fmt = @@ -578,6 +586,9 @@ tooltip = " "; subcomp->name(), parent_instr->name())); } + string computation = + Printf(computation_fmt, id, style, subcomp_label, comp_body, id); + return computation; } -- GitLab From 19680e68f33bd5918594a931855e751cc053ad8f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 31 Aug 2017 15:00:04 -0700 Subject: [PATCH 154/294] Initial submit for TFGAN. Code will be migrated soon. PiperOrigin-RevId: 167196427 --- tensorflow/BUILD | 1 + tensorflow/contrib/BUILD | 1 + tensorflow/contrib/__init__.py | 1 + tensorflow/contrib/gan/BUILD | 27 +++++++++++++++++++++++++++ tensorflow/contrib/gan/README.md | 4 ++++ tensorflow/contrib/gan/__init__.py | 19 +++++++++++++++++++ 6 files changed, 53 insertions(+) create mode 100644 tensorflow/contrib/gan/BUILD create mode 100644 tensorflow/contrib/gan/README.md create mode 100644 tensorflow/contrib/gan/__init__.py diff --git a/tensorflow/BUILD b/tensorflow/BUILD index d14c593ade..9faa0964ca 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -288,6 +288,7 @@ filegroup( "//tensorflow/contrib/ffmpeg/default:all_files", "//tensorflow/contrib/framework:all_files", "//tensorflow/contrib/fused_conv:all_files", + "//tensorflow/contrib/gan:all_files", "//tensorflow/contrib/graph_editor:all_files", "//tensorflow/contrib/grid_rnn:all_files", "//tensorflow/contrib/hooks:all_files", diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 47a0f54a02..d9f2b84346 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -28,6 +28,7 @@ py_library( "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", "//tensorflow/contrib/framework:framework_py", "//tensorflow/contrib/fused_conv:fused_conv_py", + "//tensorflow/contrib/gan", "//tensorflow/contrib/graph_editor:graph_editor_py", "//tensorflow/contrib/grid_rnn:grid_rnn_py", "//tensorflow/contrib/hooks", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 315ea943cf..d1d0e2823a 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -31,6 +31,7 @@ from tensorflow.contrib import deprecated from tensorflow.contrib import distributions from tensorflow.contrib import factorization from tensorflow.contrib import framework +from tensorflow.contrib import gan from tensorflow.contrib import graph_editor from tensorflow.contrib import grid_rnn from tensorflow.contrib import image diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD new file mode 100644 index 0000000000..b2de282356 --- /dev/null +++ b/tensorflow/contrib/gan/BUILD @@ -0,0 +1,27 @@ +package(default_visibility = ["//tensorflow:__subpackages__"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +py_library( + name = "gan", + srcs = [ + "__init__.py", + ], + srcs_version = "PY2AND3", + deps = [ + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/gan/README.md b/tensorflow/contrib/gan/README.md new file mode 100644 index 0000000000..586e5ac331 --- /dev/null +++ b/tensorflow/contrib/gan/README.md @@ -0,0 +1,4 @@ +This directory contains the TFGAN project. + +This file will have more details as code is added. + diff --git a/tensorflow/contrib/gan/__init__.py b/tensorflow/contrib/gan/__init__.py new file mode 100644 index 0000000000..a46b0e8d5d --- /dev/null +++ b/tensorflow/contrib/gan/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TFGAN grouped API.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function -- GitLab From 842f3b2c6a1305075529590244e25d4b28eda029 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 31 Aug 2017 15:12:00 -0700 Subject: [PATCH 155/294] Remove unused BUILD dependencies PiperOrigin-RevId: 167198355 --- tensorflow/contrib/image/BUILD | 5 +---- tensorflow/core/BUILD | 1 - tensorflow/core/kernels/BUILD | 10 ++-------- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD index e631c243c3..a27bec4801 100755 --- a/tensorflow/contrib/image/BUILD +++ b/tensorflow/contrib/image/BUILD @@ -121,12 +121,9 @@ tf_gen_op_wrapper_py( cc_library( name = "image_ops_cc", - srcs = [ - "ops/image_ops.cc", - ], + srcs = ["ops/image_ops.cc"], deps = [ ":image_ops_kernels", - "//tensorflow/core", "//tensorflow/core:framework", ], alwayslink = 1, diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index de9eb057e4..cd083abb50 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3079,7 +3079,6 @@ cc_test( srcs = ["example/example_parser_configuration_test.cc"], data = [":example_parser_configuration_testdata"], deps = [ - ":core", ":core_cpu", ":core_cpu_internal", ":direct_session_internal", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 0893a01204..6530ecf13f 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -259,19 +259,13 @@ cc_library( cc_library( name = "conv_ops_gpu_hdrs", hdrs = ["conv_ops_gpu.h"], - deps = [ - ":eigen_helpers", - "//third_party/eigen3", - ], + deps = ["//third_party/eigen3"], ) cc_library( name = "gpu_util_hdrs", hdrs = ["gpu_utils.h"], - deps = [ - ":eigen_helpers", - "//third_party/eigen3", - ], + deps = ["//third_party/eigen3"], ) tf_cc_test( -- GitLab From afe327405ea879ee888071660ec78ad0df35534f Mon Sep 17 00:00:00 2001 From: Jeff Carpenter Date: Thu, 31 Aug 2017 19:20:22 -0700 Subject: [PATCH 156/294] Fix step numbering in Linux install docs --- tensorflow/docs_src/install/install_linux.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 43e09906f7..d5e481520c 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -151,10 +151,10 @@ Take the following steps to install TensorFlow with Virtualenv: (tensorflow)$ pip install --upgrade tensorflow-gpu # for Python 2.7 and GPU (tensorflow)$ pip3 install --upgrade tensorflow-gpu # for Python 3.n and GPU

- If the preceding command succeeds, skip Step 5. If the preceding - command fails, perform Step 5. + If the preceding command succeeds, skip Step 6. If the preceding + command fails, perform Step 6. - 5. (Optional) If Step 4 failed (typically because you invoked a pip version + 6. (Optional) If Step 5 failed (typically because you invoked a pip version lower than 8.1), install TensorFlow in the active virtualenv environment by issuing a command of the following format: -- GitLab From 6e8d0c632dea30758c7cc343decdf8ab7956e59d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 07:40:30 -0700 Subject: [PATCH 157/294] Improve the error messaging in the case of label dimension mismatch. PiperOrigin-RevId: 167273846 --- tensorflow/python/estimator/canned/head.py | 7 +++++-- tensorflow/python/estimator/canned/head_test.py | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index d2c5772483..80d109d927 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -200,8 +200,11 @@ def _check_labels(labels, expected_labels_dimension): dim1 = static_shape[1] if (dim1 is not None) and (dim1 != expected_labels_dimension): raise ValueError( - 'labels shape must be [batch_size, labels_dimension], got %s.' % - (static_shape,)) + 'Mismatched label shape. ' + 'Classifier configured with n_classes=%s. Received %s. ' + 'Suggested Fix: check your n_classes argument to the estimator ' + 'and/or the shape of your label.' % + (expected_labels_dimension, dim1)) assert_dimension = check_ops.assert_equal( expected_labels_dimension, labels_shape[1], message=err_msg) with ops.control_dependencies([assert_dimension]): diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index 23678013c6..fa3d5b44eb 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -139,7 +139,7 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): features = {'x': np.array(((42.,),))} # Static shape. - with self.assertRaisesRegexp(ValueError, 'labels shape'): + with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features=features, mode=model_fn.ModeKeys.EVAL, @@ -889,7 +889,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): logits_2x1 = np.array(((45.,), (41.,),)) # Static shape. - with self.assertRaisesRegexp(ValueError, 'labels shape'): + with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features={'x': np.array(((42.,),))}, mode=model_fn.ModeKeys.EVAL, @@ -1692,7 +1692,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase): values_1d = np.array(((43.,), (44.,),)) # Static shape. - with self.assertRaisesRegexp(ValueError, 'labels shape'): + with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features={'x': values_1d}, mode=model_fn.ModeKeys.EVAL, @@ -1737,7 +1737,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase): values_1d = np.array(((43.,), (44.,),)) # Static shape. - with self.assertRaisesRegexp(ValueError, 'labels shape'): + with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features={'x': values_1d}, mode=model_fn.ModeKeys.TRAIN, -- GitLab From 73d796423348347702d43b498257f34e41fba367 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Fri, 1 Sep 2017 09:17:43 -0700 Subject: [PATCH 158/294] Rollback update-ability of dataflow and alias analysis added in cl/164923041 and cl/64778750. It did not scale as intended to large graphs when used in copy insertion. This change also includes some simplification and performance improvements to dataflow and alias analysis. Also add some value-ordering tests to HloOrderingTest using dataflow analysis to generate values. PiperOrigin-RevId: 167283460 --- tensorflow/compiler/xla/service/BUILD | 1 + .../xla/service/hlo_alias_analysis.cc | 601 ++++++++---------- .../compiler/xla/service/hlo_alias_analysis.h | 51 +- .../xla/service/hlo_alias_analysis_test.cc | 144 +---- tensorflow/compiler/xla/service/hlo_buffer.cc | 16 - tensorflow/compiler/xla/service/hlo_buffer.h | 15 +- .../xla/service/hlo_dataflow_analysis.cc | 588 +++++------------ .../xla/service/hlo_dataflow_analysis.h | 96 +-- .../xla/service/hlo_dataflow_analysis_test.cc | 328 +--------- .../compiler/xla/service/hlo_ordering_test.cc | 89 +++ tensorflow/compiler/xla/service/hlo_value.cc | 6 - tensorflow/compiler/xla/service/hlo_value.h | 3 + 12 files changed, 576 insertions(+), 1362 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 9d4e7fc254..610c611eee 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -849,6 +849,7 @@ cc_test( srcs = ["hlo_ordering_test.cc"], deps = [ ":hlo", + ":hlo_dataflow_analysis", ":hlo_ordering", ":hlo_scheduling", "//tensorflow/compiler/xla:shape_util", diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc index 0beea42379..3dd8ac6dc5 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc @@ -37,6 +37,230 @@ namespace xla { using ::tensorflow::strings::StrAppend; using ::tensorflow::strings::StrCat; +// Data structure used to construct the alias analysis. Thrown away after alias +// analysis is complete. This data structure keeps track of which sets of +// HloValues must be in the same HloBuffer. This is maintained as a map from a +// buffer identifier (BufferNumber) to set of HLoValues. +// +// Initially each value is its own buffer. In MergeAliasedBuffers, sets of +// values which must share the same buffer are merged together. The end result +// is a partitioning of all HloValues into sets where each set needs its own +// HloBuffer. By performing this analysis without constructing HloBuffers on the +// fly, we can after-the-fact construct a vector of contiguously numbered +// HloBuffers after the buffer requirement has been determined. +class BufferValueMap { + public: + // A unique identifier for a set of colocated values which must share the same + // buffer. This is not necessarily the same as the HloBuffer::Id which will + // ultimately contain the values. The reason is that HloBuffer::Id's are + // contiguous, while BufferNumbers may not be. BufferNumbers may not be + // dense because buffers may be created and destroyed during the analysis + // construction process. + using BufferNumber = int64; + + explicit BufferValueMap(const HloDataflowAnalysis& dataflow) + : dataflow_(dataflow) { + buffers_.reserve(dataflow_.values().size()); + value_to_buffer_number_.reserve(dataflow_.values().size()); + for (const HloValue* value : dataflow_.values()) { + BufferNumber buffer_number = next_buffer_number_++; + buffers_[buffer_number].insert(value); + value_to_buffer_number_[value] = buffer_number; + } + } + + // Merge together sets of HloValues which must be in the same HloBuffer + // because of aliasing rules (eg, in-place kWhile instruction). + void MergeAliasedBuffers() { + for (const HloValue* value : dataflow_.values()) { + VLOG(3) << "Merging colocated values, value: " << value->ToShortString(); + + // Gather the set of buffers with aliasing rules (eg, kWhile) which this + // value must be contained in. + std::vector aliased_buffers = ComputeAliasedBuffers(*value); + + BufferNumber current_buffer = value_to_buffer_number_.at(value); + if (aliased_buffers.empty()) { + // The buffer containing 'value' aliases no other buffers. If the buffer + // containing 'value' already only contains 'value', then no change is + // necessary. If the buffer containing 'value' does contain other + // values, then remove 'value' from the buffer and create a new buffer + // containing only 'value' + if (buffers_.at(current_buffer).size() == 1) { + CHECK_EQ(*buffers_.at(current_buffer).begin(), value); + } else { + MoveValueToNewBuffer(*value); + } + } else { + // If multiple buffers are aliased merge these buffers together into a + // single buffer (arbitrarily chosen as the first buffer in the vector). + if (aliased_buffers.size() > 1) { + for (int64 i = 1; i < aliased_buffers.size(); ++i) { + MergeBuffers(/*from=*/aliased_buffers[i], + /*to=*/aliased_buffers[0]); + } + } + BufferNumber new_buffer = aliased_buffers[0]; + if (current_buffer != new_buffer) { + MoveValueToBuffer(*value, new_buffer); + } + } + } + } + + // Compute and return a sorted vector of all BufferNumbers. Can be used to + // iterate through all buffers stabily. + std::vector ComputeSortedBufferNumbers() const { + std::vector buffer_numbers; + for (const auto& pair : buffers_) { + buffer_numbers.push_back(pair.first); + } + std::sort(buffer_numbers.begin(), buffer_numbers.end()); + return buffer_numbers; + } + + // Return a set of all the values in the given buffer. + const tensorflow::gtl::FlatSet& GetValuesInBuffer( + BufferNumber buffer_number) const { + return buffers_.at(buffer_number); + } + + private: + // Create a new buffer. + void NewBuffer(const HloValue& value) { + BufferNumber buffer_number = next_buffer_number_++; + buffers_[buffer_number].insert(&value); + value_to_buffer_number_[&value] = buffer_number; + } + + // Move the given value into a new buffer containing only the value. + void MoveValueToNewBuffer(const HloValue& value) { + BufferNumber new_buffer_number = next_buffer_number_++; + buffers_[new_buffer_number]; + MoveValueToBuffer(value, new_buffer_number); + } + + // Move the given value into the given buffer. + void MoveValueToBuffer(const HloValue& value, BufferNumber buffer_number) { + BufferNumber old_buffer_number = value_to_buffer_number_.at(&value); + buffers_.at(old_buffer_number).erase(&value); + if (buffers_.at(old_buffer_number).empty()) { + buffers_.erase(old_buffer_number); + } + + buffers_.at(buffer_number).insert(&value); + value_to_buffer_number_.at(&value) = buffer_number; + } + + // Merge the buffer 'from' into the buffer 'to'. + void MergeBuffers(BufferNumber from, BufferNumber to) { + auto& from_value_set = buffers_.at(from); + buffers_.at(to).insert(from_value_set.begin(), from_value_set.end()); + // NOTE: using a union-find algorithm to hold the colocated values might be + // faster. + for (const HloValue* value : from_value_set) { + value_to_buffer_number_.at(value) = to; + } + buffers_.erase(from); + } + + BufferNumber GetBufferForValue(const HloValue& value) { + return value_to_buffer_number_.at(&value); + } + + // Compute and return a vector of buffers that the given value must be + // contained in due to HLO aliasing rules. + std::vector ComputeAliasedBuffers(const HloValue& value) { + // Value is init of a while (use is while). + std::vector aliased_buffers; + for (const HloUse& use : value.uses()) { + VLOG(1) << "use of value " << value.ToShortString() << ": " << use; + if (use.instruction->opcode() == HloOpcode::kWhile) { + // Determine the while value that this shares a buffer with. + const HloValue& while_value = + dataflow_.GetUniqueValueAt(use.instruction, use.operand_index); + aliased_buffers.push_back(GetBufferForValue(while_value)); + VLOG(3) << " value is init value to a while; must share buffer with " + "while value " + << while_value.ToShortString(); + } + } + + // Value is a parameter of a while body/condition. + if (value.defining_instruction()->opcode() == HloOpcode::kParameter) { + const HloComputation* computation = + value.defining_instruction()->parent(); + const CallGraphNode& call_graph_node = + dataflow_.call_graph().GetNode(computation); + for (const CallSite& callsite : call_graph_node.caller_callsites()) { + if (callsite.instruction()->opcode() == HloOpcode::kWhile) { + // Call graph must have been flattened. + CHECK_EQ(call_graph_node.caller_callsites().size(), 1); + + const HloValue& while_value = dataflow_.GetUniqueValueAt( + callsite.instruction(), value.defining_index()); + VLOG(3) << " value is parameter value of the body or condition of a " + "while; must share buffer with while value " + << while_value.ToShortString(); + aliased_buffers.push_back(GetBufferForValue(while_value)); + } + } + } + + // Value is the root of a while body. + for (const HloPosition& position : value.positions()) { + const HloComputation* computation = position.instruction->parent(); + const CallGraphNode& call_graph_node = + dataflow_.call_graph().GetNode(computation); + if (position.instruction == computation->root_instruction()) { + for (const CallSite& callsite : call_graph_node.caller_callsites()) { + if (callsite.instruction()->opcode() == HloOpcode::kWhile && + callsite.instruction()->while_body() == computation) { + // Call graph must have been flattened. + CHECK_EQ(call_graph_node.caller_callsites().size(), 1); + + const HloValue& while_value = dataflow_.GetUniqueValueAt( + callsite.instruction(), position.index); + VLOG(3) << " value is root the body computation of a while; must " + "share buffer with while value " + << while_value.ToShortString(); + aliased_buffers.push_back(GetBufferForValue(while_value)); + } + } + } + } + + // Value is the output of the while instruction itself. + if (value.defining_instruction()->opcode() == HloOpcode::kWhile) { + VLOG(3) << " value is output of a while instruction"; + aliased_buffers.push_back(GetBufferForValue(value)); + } + + // Uniquify aliased buffers. + std::sort(aliased_buffers.begin(), aliased_buffers.end()); + aliased_buffers.erase( + std::unique(aliased_buffers.begin(), aliased_buffers.end()), + aliased_buffers.end()); + + return aliased_buffers; + } + + // Dataflow analysis used to construct the buffer map. + const HloDataflowAnalysis& dataflow_; + + // A map containing the set of values contained in each buffer. + tensorflow::gtl::FlatMap> + buffers_; + + // A map indicating which buffer each value is contained in. + tensorflow::gtl::FlatMap + value_to_buffer_number_; + + // The buffer number of the next buffer to be created. + BufferNumber next_buffer_number_ = 0; +}; + HloAliasAnalysis::HloAliasAnalysis(HloModule* module) : module_(module) {} const HloBuffer& HloAliasAnalysis::GetUniqueBufferAt( @@ -99,10 +323,11 @@ bool HloAliasAnalysis::InstructionBuffersAreDistinct( } } else { // It's possible for multiple values at this index to have the same - // HloBuffer. This does not result in non-distictness. To account for this - // case, add all of the buffers at this index after checking whether each - // buffer exists at an earlier index. This is a corner case, however, as - // the number of values at an index is almost always one. + // HloBuffer. This does not result in non-distictness. To account for + // this case, add all of the buffers at this index after checking + // whether each buffer exists at an earlier index. This is a corner + // case, however, as the number of values at an index is almost always + // one. std::vector buffers_at_this_index; for (const HloValue* value : value_set.values()) { const HloBuffer* buffer = &GetBufferContainingValue(*value); @@ -118,15 +343,6 @@ bool HloAliasAnalysis::InstructionBuffersAreDistinct( return true; } -void HloAliasAnalysis::InitializeBufferSets() { - // Initially define a buffer for every HloValue in the module. - for (const HloValue& value : dataflow_analysis_->values()) { - HloBuffer& buffer = NewHloBuffer(); - buffer.AddValue(value); - value_to_buffer_[&value] = &buffer; - } -} - Status HloAliasAnalysis::Verify() const { // Verify consistency between the value_to_buffer_ map and // HloBuffer::values(). @@ -137,9 +353,8 @@ Status HloAliasAnalysis::Verify() const { value) != buffer.values().end()); } - for (const auto& pair : buffers_) { - const HloBuffer::Id id = pair.first; - const HloBuffer& buffer = pair.second; + for (HloBuffer::Id id = 0; id < buffers_.size(); ++id) { + const HloBuffer& buffer = buffers_[id]; TF_RET_CHECK(buffer.id() == id); HloValue::Id last_value_id = -1; @@ -152,116 +367,9 @@ Status HloAliasAnalysis::Verify() const { } } - if (!buffers_vector_.empty()) { - // buffers_vector_ should be a vector of all HloBuffers sorted by id. - std::vector buffers; - for (const auto& id_buffer : buffers_) { - buffers.push_back(&id_buffer.second); - } - std::sort(buffers.begin(), buffers.end(), HloBuffer::IdLessThan); - TF_RET_CHECK(buffers_vector_ == buffers); - } - - return Status::OK(); -} - -Status HloAliasAnalysis::VerifyAgainstReference() const { - TF_RETURN_IF_ERROR(Verify()); - - TF_ASSIGN_OR_RETURN(std::unique_ptr reference, - Run(module_)); - TF_RETURN_IF_ERROR(reference->Verify()); - - VLOG(2) << "This analysis:"; - XLA_VLOG_LINES(2, ToString()); - VLOG(2) << "Reference:"; - XLA_VLOG_LINES(2, reference->ToString()); - - // Create map from HloValue in the reference analysis to HloValue in this - // analysis and vice versa. - tensorflow::gtl::FlatMap reference_to_this; - tensorflow::gtl::FlatMap this_to_reference; - for (const HloValue& value : dataflow_analysis().values()) { - const HloValue& reference_value = - reference->dataflow_analysis().GetValueDefinedAt( - value.defining_instruction(), value.defining_index()); - reference_to_this[&reference_value] = &value; - this_to_reference[&value] = &reference_value; - } - - TF_RET_CHECK(buffers_.size() == reference->buffers_.size()) - << "Different number of buffers (" << buffers_.size() - << " != " << reference->buffers_.size() << ")"; - for (const auto& pair : reference->buffers_) { - const HloBuffer& reference_buffer = pair.second; - - // Find the corresponding buffer in the reference by taking the first value - // in the buffer, finding the corresponding value in the reference, and then - // finding the buffer holding that value. - TF_RET_CHECK(!reference_buffer.values().empty()); - const HloValue* reference_value = reference_buffer.values()[0]; - const HloValue* value = reference_to_this.at(reference_value); - const HloBuffer& buffer = GetBufferContainingValue(*value); - - // The buffer and the reference should have the exact same values. To make - // comparison easy, sort the values in the reference buffer identically to - // the values in the non-reference buffer (ie, by the corresponding id of - // the non-reference value). - std::vector reference_values = reference_buffer.values(); - std::sort(reference_values.begin(), reference_values.end(), - [&reference_to_this](const HloValue* a, const HloValue* b) { - return reference_to_this.at(a)->id() < - reference_to_this.at(b)->id(); - }); - TF_RET_CHECK(reference_values.size() == buffer.values().size()); - for (int i = 0; i < buffer.values().size(); ++i) { - TF_RET_CHECK(*reference_values[i] == *buffer.values()[i]) - << "Buffer:\n " << buffer - << "\ndoes not have the same values as reference buffer:\n " - << reference_buffer; - } - } - return Status::OK(); } -HloBuffer& HloAliasAnalysis::NewHloBuffer() { - HloBuffer::Id buffer_id = next_buffer_id_++; - auto emplaced = buffers_.emplace(std::piecewise_construct, - std::forward_as_tuple(buffer_id), - std::forward_as_tuple(buffer_id)); - CHECK(emplaced.second); - - buffers_vector_.clear(); - - return emplaced.first->second; -} - -void HloAliasAnalysis::MoveValueToNewBuffer(const HloValue& value) { - HloBuffer& new_buffer = NewHloBuffer(); - MoveValueToBuffer(value, &new_buffer); - - VLOG(3) << "Moved value " << value.ToShortString() << " into new buffer " - << new_buffer.id(); -} - -void HloAliasAnalysis::MoveValueToBuffer(const HloValue& value, - HloBuffer* buffer) { - HloBuffer& old_buffer = GetBufferContainingValue(value); - CHECK_NE(buffer, &old_buffer); - VLOG(3) << "Moved value " << value.ToShortString() << " from buffer " - << old_buffer.id() << " into buffer " << buffer->id(); - old_buffer.RemoveValue(value); - if (old_buffer.values().empty()) { - VLOG(3) << "Buffer " << old_buffer.id() << " now empty. Removing."; - buffers_.erase(old_buffer.id()); - buffers_vector_.clear(); - } - - buffer->AddValue(value); - value_to_buffer_[&value] = buffer; -} - string HloAliasAnalysis::ToString() const { string out = StrCat("HloAliasAnalysis, module ", module_->name(), "\n"); StrAppend(&out, " Buffers at each position:\n"); @@ -290,10 +398,10 @@ string HloAliasAnalysis::ToString() const { } StrAppend(&out, " Buffers:\n"); - for (const HloBuffer* buffer : buffers()) { - StrAppend(&out, " ", buffer->ToString(), "\n"); + for (const HloBuffer& buffer : buffers()) { + StrAppend(&out, " ", buffer.ToString(), "\n"); StrAppend(&out, " positions:\n"); - for (const HloPosition& position : buffer->ComputePositions()) { + for (const HloPosition& position : buffer.ComputePositions()) { StrAppend(&out, " ", position.ToString(), "\n"); } } @@ -301,217 +409,6 @@ string HloAliasAnalysis::ToString() const { return out; } -const std::vector& HloAliasAnalysis::buffers() const { - if (buffers_vector_.empty()) { - // Lazily construct vector of buffers. - buffers_vector_.reserve(buffers_.size()); - for (auto& pair : buffers_) { - buffers_vector_.push_back(&pair.second); - } - std::sort(buffers_vector_.begin(), buffers_vector_.end(), - HloBuffer::IdLessThan); - } else { - CHECK_EQ(buffers_vector_.size(), buffers_.size()); - for (const HloBuffer* buffer : buffers_vector_) { - DCHECK(ContainsKey(buffers_, buffer->id())); - DCHECK(&GetBuffer(buffer->id()) == buffer); - } - } - return buffers_vector_; -} - -void HloAliasAnalysis::UpdateAtInstructions( - tensorflow::gtl::ArraySlice instructions) { - VLOG(4) << "Updated HLO module:"; - XLA_VLOG_LINES(4, module_->ToString()); - - VLOG(3) << "Before update:"; - XLA_VLOG_LINES(3, ToString()); - - std::vector values_to_update; - for (const HloInstruction* instruction : instructions) { - for (auto& pair : dataflow_analysis().GetInstructionValueSet(instruction)) { - for (const HloValue* value : pair.second.values()) { - values_to_update.push_back(value); - } - } - } - - UpdateBuffersForValues(values_to_update); - - VLOG(3) << "After update:"; - XLA_VLOG_LINES(3, ToString()); -} - -void HloAliasAnalysis::UpdateAfterChangingOperand(HloInstruction* instruction, - HloInstruction* old_operand, - HloInstruction* new_operand) { - VLOG(1) << "UpdateAfterChangingOperand(" << instruction->name() << ", " - << old_operand->name() << " => " << new_operand->name() << ")"; - - dataflow_analysis_->UpdateAfterChangingOperand(instruction, old_operand, - new_operand); - TF_DCHECK_OK(dataflow_analysis_->VerifyAgainstReference()); - - VLOG(4) << "Updated dataflow:"; - XLA_VLOG_LINES(4, dataflow_analysis_->ToString()); - - UpdateAtInstructions({instruction, old_operand, new_operand}); -} - -void HloAliasAnalysis::UpdateAfterChangingRoot(HloInstruction* old_root, - HloInstruction* new_root) { - VLOG(1) << "UpdateAfterChangingRoot(" << old_root->name() << " => " - << new_root->name() << ")"; - - dataflow_analysis_->UpdateAfterChangingRoot(old_root, new_root); - TF_DCHECK_OK(dataflow_analysis_->VerifyAgainstReference()); - - VLOG(4) << "Updated dataflow:"; - XLA_VLOG_LINES(4, dataflow_analysis_->ToString()); - - UpdateAtInstructions({old_root, new_root}); -} - -std::vector HloAliasAnalysis::ComputeAliasedBuffers( - const HloValue& value) { - std::vector aliased_buffers; - - // Value is init of a while (use is while). - for (const HloUse& use : value.uses()) { - VLOG(1) << "use of value " << value.ToShortString() << ": " << use; - if (use.instruction->opcode() == HloOpcode::kWhile) { - // Determine the while value that this shares a buffer with. - const HloValue& while_value = dataflow_analysis().GetUniqueValueAt( - use.instruction, use.operand_index); - aliased_buffers.push_back(&GetBufferContainingValue(while_value)); - VLOG(3) << " value is init value to a while; must share buffer with " - "while value " - << while_value.ToShortString(); - } - } - - // Value is a parameter of a while body/condition. - if (value.defining_instruction()->opcode() == HloOpcode::kParameter) { - const HloComputation* computation = value.defining_instruction()->parent(); - const CallGraphNode& call_graph_node = - dataflow_analysis().call_graph().GetNode(computation); - for (const CallSite& callsite : call_graph_node.caller_callsites()) { - if (callsite.instruction()->opcode() == HloOpcode::kWhile) { - // Call graph must have been flattened. - CHECK_EQ(call_graph_node.caller_callsites().size(), 1); - - const HloValue& while_value = dataflow_analysis().GetUniqueValueAt( - callsite.instruction(), value.defining_index()); - VLOG(3) << " value is parameter value of the body or condition of a " - "while; must share buffer with while value " - << while_value.ToShortString(); - aliased_buffers.push_back(&GetBufferContainingValue(while_value)); - } - } - } - - // Value is the root of a while body. - for (const HloPosition& position : value.positions()) { - const HloComputation* computation = position.instruction->parent(); - const CallGraphNode& call_graph_node = - dataflow_analysis().call_graph().GetNode(computation); - if (position.instruction == computation->root_instruction()) { - for (const CallSite& callsite : call_graph_node.caller_callsites()) { - if (callsite.instruction()->opcode() == HloOpcode::kWhile && - callsite.instruction()->while_body() == computation) { - // Call graph must have been flattened. - CHECK_EQ(call_graph_node.caller_callsites().size(), 1); - - // If the value appears in the root of a while body, then - // necessarily the value is defined in the body as well. - CHECK_EQ(value.defining_instruction()->parent(), computation); - - const HloValue& while_value = dataflow_analysis().GetUniqueValueAt( - callsite.instruction(), position.index); - VLOG(3) << " value is root the body computation of a while; must " - "share buffer with while value " - << while_value.ToShortString(); - aliased_buffers.push_back(&GetBufferContainingValue(while_value)); - } - } - } - } - - // Value is in the while instruction itself. - if (value.defining_instruction()->opcode() == HloOpcode::kWhile) { - VLOG(3) << " value is output of a while instruction"; - aliased_buffers.push_back(&GetUniqueBufferAt(value.defining_instruction(), - value.defining_index())); - } - - // Uniquify aliased buffers. - std::sort(aliased_buffers.begin(), aliased_buffers.end(), - HloBuffer::IdLessThan); - aliased_buffers.erase( - std::unique(aliased_buffers.begin(), aliased_buffers.end()), - aliased_buffers.end()); - - return aliased_buffers; -} - -// This method recomputes the HloBuffer for each of the given HloValues. The -// method does not necessarily update the HloBuffer of values which share a -// buffer with the given values, but are not explicitly passed in -// 'values'. Therefore, the caller must pass in all values which may require an -// update according to the kind of HLO graph change which occurred: operand -// changed (UpdateAfterChangingOperand), or root of computation changed -// (UpdateAfterChangingRoot). -void HloAliasAnalysis::UpdateBuffersForValues( - tensorflow::gtl::ArraySlice values) { - for (const HloValue* value : values) { - VLOG(3) << "Updating buffer for value: " << value->ToShortString(); - - // Gather the set of buffer with aliasing rules (eg, kWhile) which this - // value must be contained in due. - std::vector aliased_buffers = ComputeAliasedBuffers(*value); - - HloBuffer& current_buffer = GetBufferContainingValue(*value); - if (aliased_buffers.empty()) { - // The buffer containing 'value' aliases no other buffers. If the buffer - // containing 'value' already only contains 'value', then no change is - // necessary. If the buffer containing 'value' does contain other values, - // then remove 'value' from the buffer and create a new buffer containing - // only 'value' - if (current_buffer.values().size() == 1) { - CHECK_EQ(current_buffer.values()[0], value); - } else { - MoveValueToNewBuffer(*value); - } - } else { - // If multiple buffers are aliased merge these buffers together into a - // single buffer (arbitrarily chosen as the first buffer in the vector). - if (aliased_buffers.size() > 1) { - for (int64 i = 1; i < aliased_buffers.size(); ++i) { - // Make copy of values vector because MoveValueToBuffer invalidates - // the values iterator. The could be done more efficiently by moving - // all values and once. - std::vector values = aliased_buffers[i]->values(); - for (const HloValue* value : values) { - MoveValueToBuffer(*value, aliased_buffers[0]); - } - } - aliased_buffers.resize(1); - } - - CHECK_EQ(aliased_buffers.size(), 1); - HloBuffer* new_buffer = aliased_buffers[0]; - - if (¤t_buffer != new_buffer) { - MoveValueToBuffer(*value, new_buffer); - } - } - - VLOG(4) << "Analysis after update:"; - XLA_VLOG_LINES(4, ToString()); - } -} - /* static */ StatusOr> HloAliasAnalysis::Run( HloModule* module) { @@ -524,18 +421,28 @@ StatusOr> HloAliasAnalysis::Run( HloDataflowAnalysis::Run(module, /*ssa_form=*/true, /*bitcast_defines_value=*/false)); - alias_analysis->InitializeBufferSets(); - - VLOG(3) << "After initialization:"; - XLA_VLOG_LINES(3, alias_analysis->ToString()); - - std::vector all_values; - for (const HloValue& value : alias_analysis->dataflow_analysis().values()) { - all_values.push_back(&value); + BufferValueMap buffer_map(alias_analysis->dataflow_analysis()); + buffer_map.MergeAliasedBuffers(); + + // Create a vector of HloBuffers, one for each set of values in the + // BufferValueMap. Create the HloBuffers as a vector of contiguously numbered + // buffers. + std::vector sorted_buffer_numbers = + buffer_map.ComputeSortedBufferNumbers(); + alias_analysis->buffers_.reserve(sorted_buffer_numbers.size()); + HloBuffer::Id next_id = 0; + for (BufferValueMap::BufferNumber buffer_number : sorted_buffer_numbers) { + auto& value_set = buffer_map.GetValuesInBuffer(buffer_number); + std::vector sorted_values(value_set.begin(), + value_set.end()); + std::sort(sorted_values.begin(), sorted_values.end(), HloValue::IdLessThan); + alias_analysis->buffers_.emplace_back(next_id++, sorted_values); + for (const HloValue* value : sorted_values) { + alias_analysis->value_to_buffer_[value] = + &alias_analysis->buffers_.back(); + } } - alias_analysis->UpdateBuffersForValues(all_values); - TF_DCHECK_OK(alias_analysis->Verify()); XLA_VLOG_LINES(1, alias_analysis->ToString()); diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.h b/tensorflow/compiler/xla/service/hlo_alias_analysis.h index 1b538f6d1c..39554e4664 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.h @@ -74,7 +74,7 @@ class HloAliasAnalysis { // Return a vector of all HloBuffers stabily sorted by HloBuffer::Id. This // vector is lazily computed. Mutating operations on HloAliasAnalysis may // invalidate the underlying vector requiring recomputation. - const std::vector& buffers() const; + const std::vector& buffers() const { return buffers_; } // Returns the underlying dataflow analysis used by this alias analysis. const HloDataflowAnalysis& dataflow_analysis() const { @@ -90,50 +90,13 @@ class HloAliasAnalysis { // output of the given instruction. bool InstructionBuffersAreDistinct(const HloInstruction* instruction) const; - // Updates the analysis after the operands of 'instruction' have changed or if - // 'instruction' has been made the root of a computation. Analysis update is - // not possible if instructions have been added or removed from the graph. - void UpdateAfterChangingOperand(HloInstruction* instruction, - HloInstruction* old_operand, - HloInstruction* new_operand); - void UpdateAfterChangingRoot(HloInstruction* old_root, - HloInstruction* new_root); - // Compare the dataflow analysis against a clean recomputation of the // analysis. Returns an error status if there is a mismatch. Useful for // verifying the correctness after updates to the analysis. Status VerifyAgainstReference() const; protected: - HloAliasAnalysis(HloModule* module); - - // Create a new empty HloBuffer. - HloBuffer& NewHloBuffer(); - - // Move the given value to the given buffer. The value is removed from it's - // current buffer. - void MoveValueToBuffer(const HloValue& value, HloBuffer* buffer); - - // Move the given value to a newly created buffer. The value is removed from - // it's current buffer. - void MoveValueToNewBuffer(const HloValue& value); - - // Construct the initial set of buffer sets where an HloBuffer is created for - // each HloValue in the module. - void InitializeBufferSets(); - - // Compute and return the buffers with aliasing rules (eg, kWhile) which the - // given value must be contained in. - std::vector ComputeAliasedBuffers(const HloValue& value); - - // Recompute the HloBuffers for the given values. - void UpdateBuffersForValues( - tensorflow::gtl::ArraySlice values); - - // Recompute the HloBuffers for all the values which appear in the output of - // the given instructions. - void UpdateAtInstructions( - tensorflow::gtl::ArraySlice instructions); + explicit HloAliasAnalysis(HloModule* module); // Verify various invariants of the alias analysis. Status Verify() const; @@ -143,20 +106,12 @@ class HloAliasAnalysis { // The underlying dataflow analysis used by this alias analysis. std::unique_ptr dataflow_analysis_; - // The map of all HloBuffers in the module. We pass around pointers to the - // mapped HloBuffers, so the underlying container must keep them valid despite - // mutations touching other map entries. - std::unordered_map buffers_; - // A map indicating which buffer a value is contained in. tensorflow::gtl::FlatMap value_to_buffer_; // A lazily constructed vector containing all HloBuffers sorted by // HloBuffer::Id. - mutable std::vector buffers_vector_; - - // The Id to use for the next HloBuffer. - int64 next_buffer_id_ = 0; + std::vector buffers_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc index e2815d6e64..6e311e25fb 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc @@ -87,14 +87,13 @@ class HloAliasAnalysisTest : public HloTestBase { // constructed. bool AnyValuesInSameBufferInterfere() { DependencyHloOrdering ordering(module_.get()); - for (const HloBuffer* buffer : analysis_->buffers()) { - for (const HloValue* value_a : buffer->values()) { - for (const HloValue* value_b : buffer->values()) { + for (const HloBuffer& buffer : analysis_->buffers()) { + for (const HloValue* value_a : buffer.values()) { + for (const HloValue* value_b : buffer.values()) { if (*value_a != *value_b && - analysis_->dataflow_analysis().MayInterfere(*value_a, *value_b, - ordering)) { + ordering.MayInterfere(*value_a, *value_b)) { VLOG(1) << *value_a << " interferes with " << *value_b - << " in buffer: " << *buffer; + << " in buffer: " << buffer; return true; } } @@ -384,10 +383,7 @@ TEST_F(HloAliasAnalysisTest, SingleWhile) { EXPECT_THAT( GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{0})), - UnorderedElementsAre(GetValueDefinedAt(xla_while, /*index=*/{0}), - GetValueDefinedAt(body_param, /*index=*/{0}), - GetValueDefinedAt(cond_param, /*index=*/{0}), - GetValueDefinedAt(constant1))); + UnorderedElementsAre(GetValueDefinedAt(constant1))); EXPECT_THAT( GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{1})), UnorderedElementsAre(GetValueDefinedAt(constant2), @@ -631,9 +627,9 @@ TEST_F(HloAliasAnalysisTest, SwizzlingWhile) { // HloBuffers. EXPECT_THAT( analysis.buffers(), - UnorderedElementsAre(&analysis.GetUniqueBufferAt(constant1), - &analysis.GetUniqueBufferAt(tuple, /*index=*/{}), - &analysis.GetUniqueBufferAt(cond_constant))); + UnorderedElementsAre(analysis.GetUniqueBufferAt(constant1), + analysis.GetUniqueBufferAt(tuple, /*index=*/{}), + analysis.GetUniqueBufferAt(cond_constant))); // The tuple elements of the while and the three constant inputs should all be // smooshed into the same buffer. @@ -820,127 +816,5 @@ TEST_F(HloAliasAnalysisTest, Bitcast) { analysis.GetUniqueBufferAt(bitcast)); } -TEST_F(HloAliasAnalysisTest, UpdateAnalysisForWhile) { - // Test updating alias analysis after modifying a module with an array shaped - // while: - // - // body(F32[] %param): - // %negate = Negate(%param) - // - // condition(F32[] %param): - // return Constant(false) - // - // entry: - // %constant = Constant(1.0) - // %exp = Exp(%constant) - // return While(%exp, body, condition) - // - auto body_builder = HloComputation::Builder("body"); - auto body_param = body_builder.AddInstruction( - HloInstruction::CreateParameter(0, scalar_shape_, "param")); - auto negate = body_builder.AddInstruction(HloInstruction::CreateUnary( - scalar_shape_, HloOpcode::kNegate, body_param)); - HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build()); - - // Condition computation trivially returns a constant "false". - auto cond_builder = HloComputation::Builder("condition"); - auto cond_param = cond_builder.AddInstruction( - HloInstruction::CreateParameter(0, scalar_shape_, "param")); - cond_builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(false))); - HloComputation* condition = - module_->AddEmbeddedComputation(cond_builder.Build()); - - auto builder = HloComputation::Builder(TestName()); - auto constant = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1.0))); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kExp, constant)); - auto xla_while = builder.AddInstruction( - HloInstruction::CreateWhile(scalar_shape_, condition, body, exp)); - module_->AddEntryComputation(builder.Build()); - - HloAliasAnalysis& analysis = RunAnalysis(); - - // Sanity check some alias information. - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(body_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(cond_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(negate)); - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(xla_while)); - - // Set the body root to the body_param. Previously it was Negate(body_param). - body->set_root_instruction(body_param); - - // Prior to updating, verify that the analysis is no longer valid. - Status verify_status = analysis.VerifyAgainstReference(); - EXPECT_FALSE(verify_status.ok()); - - analysis.UpdateAfterChangingRoot(/*old_root=*/negate, - /*new_root*/ body_param); - - // Analysis should be valid after the update. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - // The exponential should now pass through the body transparently. - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(body_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(cond_param)); - EXPECT_NE(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(negate)); - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(xla_while)); - - // Now replace the operand of the while with %constant (was %exp). - TF_ASSERT_OK(exp->ReplaceUseWith(xla_while, constant)); - analysis.UpdateAfterChangingOperand(xla_while, /*old_operand=*/exp, - /*new_operand=*/constant); - - // Analysis should be valid after the update. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - EXPECT_EQ(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(body_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(cond_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(xla_while)); - EXPECT_NE(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(exp)); - EXPECT_NE(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(negate)); - - // And finally make the negate the root of the body again. - body->set_root_instruction(negate); - analysis.UpdateAfterChangingRoot(/*old_root=*/body_param, - /*new_root*/ negate); - - // Analysis should be valid after the update. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - EXPECT_EQ(analysis.GetUniqueBufferAt(negate), - analysis.GetUniqueBufferAt(body_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(negate), - analysis.GetUniqueBufferAt(cond_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(negate), - analysis.GetUniqueBufferAt(xla_while)); - EXPECT_EQ(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(negate)); - - auto value_of = [&analysis](const HloInstruction* instruction) { - return &analysis.dataflow_analysis().GetValueDefinedAt(instruction); - }; - EXPECT_THAT(analysis.GetUniqueBufferAt(negate).values(), - UnorderedElementsAre(value_of(body_param), value_of(cond_param), - value_of(negate), value_of(constant), - value_of(xla_while))); -} - -// Test update tuple element. - } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_buffer.cc b/tensorflow/compiler/xla/service/hlo_buffer.cc index 2bfdd9156a..e16413f361 100644 --- a/tensorflow/compiler/xla/service/hlo_buffer.cc +++ b/tensorflow/compiler/xla/service/hlo_buffer.cc @@ -36,22 +36,6 @@ namespace xla { using ::tensorflow::str_util::Join; using ::tensorflow::strings::StrCat; -void HloBuffer::AddValue(const HloValue& value) { - values_.push_back(&value); - // Sort vector and remove duplicates. - std::sort(values_.begin(), values_.end(), HloValue::IdLessThan); - values_.erase(std::unique(values_.begin(), values_.end(), HloValue::IdEqual), - values_.end()); -} - -void HloBuffer::RemoveValue(const HloValue& value) { - // The values are sorted, so finding the value could be done in log(n) time - // with a binary search. - auto it = std::find(values_.begin(), values_.end(), &value); - CHECK(it != values_.end()); - values_.erase(it); -} - bool HloBuffer::operator==(const HloBuffer& other) const { bool equal = id() == other.id(); if (equal) { diff --git a/tensorflow/compiler/xla/service/hlo_buffer.h b/tensorflow/compiler/xla/service/hlo_buffer.h index cb961e1601..4873463b2e 100644 --- a/tensorflow/compiler/xla/service/hlo_buffer.h +++ b/tensorflow/compiler/xla/service/hlo_buffer.h @@ -84,22 +84,15 @@ class HloBuffer { return a->id() == b->id(); } - HloBuffer(Id id) : id_(id) {} + HloBuffer(Id id, tensorflow::gtl::ArraySlice values) + : id_(id), values_(values.begin(), values.end()) {} // Return the unique identifier for this HloBuffer. Id id() const { return id_; } - // Add a value to the set of values held by this buffer. Also adds the - // HloPositions of the value to the positions vector of the buffer. If the - // buffer already contains this value, then this method is a nop. - void AddValue(const HloValue& value); - void RemoveValue(const HloValue& value); - // Return all values contained in this buffer. const std::vector& values() const { return values_; } - std::vector ComputePositions() const; - // Return the unique HLO value in the buffer. CHECK fails if the buffer does // not contain exactly one value. const HloValue& GetUniqueValue() const { @@ -107,6 +100,8 @@ class HloBuffer { return *values_[0]; } + std::vector ComputePositions() const; + string ToString() const; bool operator==(const HloBuffer& other) const; @@ -118,7 +113,7 @@ class HloBuffer { // The set of values contained in this buffer. Vector contains no duplicates // and is sorted stably by HloValue::Id. - std::vector values_; + const std::vector values_; }; std::ostream& operator<<(std::ostream& out, const HloBuffer& buffer); diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index ea8b239e10..2be1645f1b 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -67,6 +67,22 @@ HloValue& HloDataflowAnalysis::GetValueDefinedAt( return GetUniqueValueAt(instruction, index); } +HloValue* HloDataflowAnalysis::NewHloValue(HloInstruction* instruction, + const ShapeIndex& index, + bool is_phi) { + const int64 value_id = next_value_id_++; + auto emplaced = values_.emplace( + std::piecewise_construct, std::forward_as_tuple(value_id), + std::forward_as_tuple(value_id, instruction, index, is_phi)); + CHECK(emplaced.second); + + return &emplaced.first->second; +} + +void HloDataflowAnalysis::DeleteHloValue(HloValue::Id value_id) { + values_.erase(value_id); +} + string HloDataflowAnalysis::ToString() const { string out = StrCat("HloDataflowAnalysis, module ", module_->name(), "\n"); StrAppend(&out, " Instruction value sets:\n"); @@ -99,20 +115,96 @@ string HloDataflowAnalysis::ToString() const { } } StrAppend(&out, " HloValues:\n"); - for (const HloValue& value : values()) { - StrAppend(&out, value.ToString(/*indent=*/4)); + for (const HloValue* value : values()) { + StrAppend(&out, value->ToString(/*indent=*/4)); + } + return out; +} + +bool HloDataflowAnalysis::Phi( + HloInstruction* instruction, + tensorflow::gtl::ArraySlice inputs) { + CHECK(ssa_form_); + + for (const InstructionValueSet* input : inputs) { + DCHECK(ShapeUtil::Compatible(instruction->shape(), input->shape())); } - StrAppend(&out, " Phi resolutions:\n"); - for (const HloValue& value : values()) { - if (value.is_phi()) { - const HloValue* resolved_value = ResolvePhi(value); - StrAppend(&out, " ", value.ToShortString(), " => ", - resolved_value == nullptr ? "UNKNOWN" - : resolved_value->ToShortString(), - "\n"); + + bool changed = false; + for (auto& pair : GetInstructionValueSet(instruction)) { + const ShapeIndex& index = pair.first; + HloValueSet& value_set = pair.second; + + // Positions with phi values should never have more than one value in the + // value set. + CHECK_LE(value_set.values().size(), 1); + const HloValue* current_value = + value_set.values().size() == 1 ? value_set.values()[0] : nullptr; + + // Construct a vector of unique value IDs of the inputs. + std::vector input_value_ids; + for (const InstructionValueSet* input : inputs) { + for (const HloValue* value : input->element(index).values()) { + input_value_ids.push_back(value->id()); + } + } + std::sort(input_value_ids.begin(), input_value_ids.end()); + input_value_ids.erase( + std::unique(input_value_ids.begin(), input_value_ids.end()), + input_value_ids.end()); + + // Remove the existing phi value (if it exists). The phi can be its own + // input, for example, in while body parameters where the body passes + // through the parameter value. + bool current_value_defined_here = + (current_value != nullptr && + current_value->defining_instruction() == instruction && + current_value->defining_index() == index); + if (current_value_defined_here) { + CHECK(current_value->is_phi()); + auto it = std::find(input_value_ids.begin(), input_value_ids.end(), + current_value->id()); + if (it != input_value_ids.end()) { + input_value_ids.erase(it); + } + } + + if (input_value_ids.empty()) { + // A value set which has at least one element should never have its value + // set reduced to zero elements. During dataflow value sets only can go + // from empty to non-empty, not the reverse. + CHECK_EQ(value_set.values().size(), 0) + << "Instruction " << instruction->name() << " at index " << index + << " previously had non-empty value set. Value set: " << value_set; + } else if (input_value_ids.size() == 1) { + // Only a single value reaches this point. There should be no phi, and + // this value set should contain this single value. + const HloValue& new_value = GetValue(input_value_ids[0]); + if (current_value == nullptr) { + value_set.Clear(); + value_set.AddValue(&new_value); + changed = true; + } else if (current_value != &new_value) { + if (current_value_defined_here) { + // Remove the existing phi. + DeleteHloValue(current_value->id()); + } + value_set.Clear(); + value_set.AddValue(&new_value); + changed = true; + } + } else { + // Multiple distinct values reach this point. A phi value is + // necessary. + CHECK_GT(input_value_ids.size(), 1); + if (current_value == nullptr || !current_value->is_phi()) { + value_set.Clear(); + value_set.AddValue(NewHloValue(instruction, index, /*is_phi=*/true)); + changed = true; + } } } - return out; + return changed; } const HloValue& HloDataflowAnalysis::GetValue(HloValue::Id value_id) const { @@ -142,129 +234,6 @@ HloValueSet& HloDataflowAnalysis::GetValueSet(const HloPosition& position) { return GetValueSet(position.instruction, position.index); } -void HloDataflowAnalysis::UpdateAfterChangingOperand( - HloInstruction* instruction, HloInstruction* old_operand, - HloInstruction* new_operand) { - CHECK(std::find(instruction->operands().begin(), - instruction->operands().end(), - new_operand) != instruction->operands().end()); - VLOG(1) << "UpdateAfterChangingOperand(" << instruction->name() << ", " - << old_operand->name() << " => " << new_operand->name() << ")"; - - std::vector to_update = {instruction}; - - // If the instruction calls any computations then add the parameters of called - // computation to capture any changes to the dataflow into the subcomputation - // introduced by the new operand. - for (HloComputation* computation : instruction->called_computations()) { - to_update.insert(to_update.end(), - computation->parameter_instructions().begin(), - computation->parameter_instructions().end()); - } - - UpdateInstructionsAndPropagate(to_update); - - // The uses of the values in the old and new operand may have changed. Uses of - // other HloValues are updated in UpdateInstructionsAndPropagate. - for (auto& pair : GetInstructionValueSet(old_operand)) { - for (const HloValue* value : pair.second.values()) { - GetValue(value->id()).RecomputeUses(); - } - } - for (auto& pair : GetInstructionValueSet(new_operand)) { - for (const HloValue* value : pair.second.values()) { - GetValue(value->id()).RecomputeUses(); - } - } - - TF_DCHECK_OK(VerifyAgainstReference()); -} - -void HloDataflowAnalysis::UpdateAfterChangingRoot(HloInstruction* old_root, - HloInstruction* new_root) { - VLOG(1) << "UpdateAfterChangingRoot(" << old_root->name() << " => " - << new_root->name() << ")"; - - CHECK_EQ(new_root, new_root->parent()->root_instruction()); - CHECK_EQ(new_root->parent(), old_root->parent()); - - std::vector to_update = {old_root, new_root}; - - const CallGraphNode& call_graph_node = - call_graph_->GetNode(new_root->parent()); - for (const CallSite& callsite : call_graph_node.caller_callsites()) { - if (callsite.instruction()->opcode() == HloOpcode::kCall) { - to_update.push_back(callsite.instruction()); - } else if (callsite.instruction()->opcode() == HloOpcode::kWhile) { - // Add the while itself, and the body and condition parameters. - to_update.push_back(callsite.instruction()); - to_update.push_back( - callsite.instruction()->while_body()->parameter_instruction(0)); - to_update.push_back( - callsite.instruction()->while_condition()->parameter_instruction(0)); - } - } - - UpdateInstructionsAndPropagate(to_update); - - TF_DCHECK_OK(VerifyAgainstReference()); -} - -const HloValue* HloDataflowAnalysis::ResolvePhi(const HloValue& phi) const { - CHECK(phi.is_phi()); - - tensorflow::gtl::FlatSet visited; - std::queue worklist; - auto add_to_worklist = [&worklist, &visited](const HloValue* v) { - if (visited.insert(v).second) { - // 'v' was not previously in visited. - worklist.push(v); - } - }; - add_to_worklist(&phi); - - const HloValue* resolved_value = nullptr; - while (!worklist.empty()) { - const HloValue* value = worklist.front(); - worklist.pop(); - - if (!value->is_phi()) { - if (resolved_value == nullptr) { - resolved_value = value; - } else if (resolved_value != value) { - return nullptr; - } - } else { - for (const HloValue* input : phi_inputs_.at(value)) { - add_to_worklist(input); - } - } - } - return resolved_value; -} - -void HloDataflowAnalysis::UpdatePhiInputs( - const HloInstruction* instruction, - tensorflow::gtl::ArraySlice inputs) { - CHECK(ssa_form_); - for (auto& pair : GetInstructionValueSet(instruction)) { - const ShapeIndex& index = pair.first; - const HloValue& phi_value = GetUniqueValueAt(instruction, index); - auto& phi_inputs = phi_inputs_.at(&phi_value); - phi_inputs.clear(); - for (const InstructionValueSet* input : inputs) { - for (const HloValue* value : input->element(index).values()) { - // The number of phi inputs is typically 2, and virtually always very - // small. - if (std::find(phi_inputs.begin(), phi_inputs.end(), value) == - phi_inputs.end()) { - phi_inputs.push_back(value); - } - } - } - } -} - bool HloDataflowAnalysis::UpdateBitcastValueSet(HloInstruction* bitcast) { CHECK_EQ(bitcast->opcode(), HloOpcode::kBitcast); const InstructionValueSet& operand_set = @@ -380,8 +349,7 @@ bool HloDataflowAnalysis::UpdateParameterValueSet(HloInstruction* parameter) { } if (ssa_form_ && called_from_while) { - UpdatePhiInputs(parameter, inputs); - return false; + return Phi(parameter, inputs); } else { return GetInstructionValueSet(parameter).AssignUnionOf(inputs); } @@ -439,8 +407,7 @@ bool HloDataflowAnalysis::UpdateWhileValueSet(HloInstruction* xla_while) { &GetInstructionValueSet(xla_while->while_body()->root_instruction()), &GetInstructionValueSet(xla_while->operand(0))}; if (ssa_form_) { - UpdatePhiInputs(xla_while, inputs); - return false; + return Phi(xla_while, inputs); } else { return GetInstructionValueSet(xla_while).AssignUnionOf(inputs); } @@ -487,38 +454,7 @@ void HloDataflowAnalysis::UpdateInstructionsAndPropagate( VLOG(3) << "Worklist top: " << instruction->name(); VLOG(3) << ToString(); - // The updating of the instruction value set below in - // UpdateInstructionValueSet does not update HloValue::positions(). To - // perform the positions() update remove all positions in 'instruction' from - // the HloValues in 'instruction's value set prior to the update, then after - // the update add the new positions back in. There is likely a more - // efficient way of doing this. - for (auto& pair : GetInstructionValueSet(instruction)) { - const ShapeIndex& index = pair.first; - HloValueSet& value_set = pair.second; - for (const HloValue* value : value_set.values()) { - if (value->defining_instruction() != instruction) { - // Use GetValue for a non-const HloValue reference. - GetValue(value->id()).RemovePosition(instruction, index); - } - } - } - - bool changed = UpdateInstructionValueSet(instruction); - - // Add the positions back in. - for (auto& pair : GetInstructionValueSet(instruction)) { - const ShapeIndex& index = pair.first; - HloValueSet& value_set = pair.second; - for (const HloValue* value : value_set.values()) { - if (value->defining_instruction() != instruction) { - // Use GetValue for a non-const HloValue reference. - GetValue(value->id()).AddPosition(instruction, index); - } - } - } - - if (!changed) { + if (!UpdateInstructionValueSet(instruction)) { // No change to the instruction's value set. VLOG(4) << "No change."; continue; @@ -531,12 +467,16 @@ void HloDataflowAnalysis::UpdateInstructionsAndPropagate( for (HloInstruction* user : instruction->users()) { worklist.push(user); - // If user calls a computation, then the respective parameter(s) of the - // computation need to be updated. + // If user sequentially calls a computation, then the respective + // parameter(s) of the computation need to be updated. for (HloComputation* called_computation : user->called_computations()) { - for (int64 operand_number : user->OperandIndices(instruction)) { - worklist.push( - called_computation->parameter_instruction(operand_number)); + const CallGraphNode& call_graph_node = + call_graph_->GetNode(called_computation); + if (call_graph_node.context() == CallContext::kSequential) { + for (int64 operand_number : user->OperandIndices(instruction)) { + worklist.push( + called_computation->parameter_instruction(operand_number)); + } } } } @@ -574,25 +514,10 @@ InstructionValueSet& HloDataflowAnalysis::GetInstructionValueSet( } Status HloDataflowAnalysis::InitializeInstructionValueSets() { - // Gather the values to create before creating them. This is done because we - // want to allocate the vector of values only once so references to elements - // are stable. - struct ValueToCreate { - HloInstruction* instruction; - ShapeIndex index; - bool is_phi; - }; - std::vector values_to_create; - for (const std::unique_ptr& computation : module_->computations()) { const CallGraphNode& call_graph_node = call_graph_->GetNode(computation.get()); - bool called_from_while = std::any_of( - call_graph_node.caller_callsites().begin(), - call_graph_node.caller_callsites().end(), [](const CallSite& cs) { - return cs.instruction()->opcode() == HloOpcode::kWhile; - }); for (const std::unique_ptr& instruction : computation->instructions()) { @@ -603,20 +528,22 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { // Lambda to set the value set to define all values in the output of the // instruction. - auto define_all_values = [this, &instruction, - &values_to_create](bool is_phi = false) { + auto define_all_values = [this, &instruction](bool is_phi = false) { for (auto& pair : GetInstructionValueSet(instruction.get())) { const ShapeIndex& index = pair.first; - values_to_create.push_back({instruction.get(), index, is_phi}); + HloValue* value = + NewHloValue(instruction.get(), index, /*is_phi=*/false); + GetValueSet(instruction.get(), index).AddValue(value); } }; // Lambda to set the value set to define only the top-level buffer in the // output of the instruction. Any other values flow from the operands of // the instruction (or from cross-computation dataflow). - auto define_top_level_only = [this, &instruction, &values_to_create]() { - values_to_create.push_back( - {instruction.get(), /*index=*/{}, /*is_phi=*/false}); + auto define_top_level_only = [this, &instruction]() { + HloValue* value = + NewHloValue(instruction.get(), /*index=*/{}, /*is_phi=*/false); + GetValueSet(instruction.get(), /*index=*/{}).AddValue(value); }; switch (instruction->opcode()) { @@ -626,10 +553,6 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { } break; case HloOpcode::kWhile: - if (ssa_form_) { - define_all_values(/*is_phi=*/true); - } - break; case HloOpcode::kCall: case HloOpcode::kGetTupleElement: // These instructions define no values. The values in their output @@ -654,10 +577,6 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { // values in their output. Otherwise the values of the parameter // come from the caller (eg, operands to the kCall instruction). define_all_values(); - } else if (call_graph_node.context() == CallContext::kSequential && - called_from_while && ssa_form_) { - // Parameters of while bodies and conditions are phis. - define_all_values(/*is_phi=*/true); } break; case HloOpcode::kCopy: @@ -674,164 +593,9 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { } } - // Reserve the vector ahead of time so references to elements are stable. - values_.reserve(values_to_create.size()); - for (int64 i = 0; i < values_to_create.size(); ++i) { - const ValueToCreate& to_create = values_to_create[i]; - values_.emplace_back(/*id=*/i, to_create.instruction, to_create.index, - to_create.is_phi); - const HloValue& value = values_.back(); - GetValueSet(to_create.instruction, to_create.index).AddValue(&value); - if (value.is_phi()) { - phi_inputs_[&value] = {}; - } - } return Status::OK(); } -bool HloDataflowAnalysis::IsDefinedBefore(const HloValue& a, const HloValue& b, - const HloOrdering& ordering) const { - // If 'b' is an entry param then 'a' cannot be defined before 'b' because 'b' - // is live into the module. - if (b.defining_instruction()->parent() == module_->entry_computation() && - b.defining_instruction()->opcode() == HloOpcode::kParameter) { - return false; - } - - // Phi values require special handling. Because XLA does not have a phi - // instruction, the definition instruction of the phis values are - // placeholders: either the subcomputation parameter (body or condition) or - // the while instruction. However, the program point where these values are - // logically defined does not necessarily coincide exactly with program point - // of these place-holder instructions. So we explicitly define the following - // order for phi values: - // - // body/condition parameter phi: - // Defined before all values defined in its computation excepting other - // phis. - // - // while phi: - // defined after all values defined in the condition or body. - // - auto is_body_or_condition_phi = [](const HloValue& v) { - return v.is_phi() && - v.defining_instruction()->opcode() == HloOpcode::kParameter; - }; - if (is_body_or_condition_phi(a) && !is_body_or_condition_phi(b) && - call_graph_->InstructionIsNestedIn(b.defining_instruction(), - a.defining_instruction()->parent())) { - return true; - } - if (is_body_or_condition_phi(b) && - call_graph_->InstructionIsNestedIn(a.defining_instruction(), - b.defining_instruction()->parent())) { - return false; - } - - // If 'b' is a while phi and 'a' is in the body or condition, then 'a' - // executes before 'b'. - if (b.is_phi() && b.defining_instruction()->opcode() == HloOpcode::kWhile && - (call_graph_->InstructionIsNestedIn( - a.defining_instruction(), b.defining_instruction()->while_body()) || - call_graph_->InstructionIsNestedIn( - a.defining_instruction(), - b.defining_instruction()->while_condition()))) { - return true; - } - - return ordering.ExecutesBefore(a.defining_instruction(), - b.defining_instruction()); -} - -bool HloDataflowAnalysis::UseIsBeforeValueDefinition( - const HloUse& use, const HloValue& value, - const HloOrdering& ordering) const { - if (ordering.ExecutesBefore(use.instruction, value.defining_instruction())) { - return true; - } - - // If the use is at the instruction where the value is defined, then the use - // is before the def if the instruction allows buffer sharing (in place - // computation). - if (use.instruction == value.defining_instruction() && - CanShareOperandBufferWithUser( - use.instruction->mutable_operand(use.operand_number), - use.operand_index, value.defining_instruction(), - value.defining_index())) { - return true; - } - - // The use at a while is an input to a phi, and logically occurs before values - // are defined in the body or condition computations. - if (use.instruction->opcode() == HloOpcode::kWhile) { - const HloInstruction* xla_while = use.instruction; - if (call_graph_->InstructionIsNestedIn(value.defining_instruction(), - xla_while->while_body()) || - call_graph_->InstructionIsNestedIn(value.defining_instruction(), - xla_while->while_condition())) { - return true; - } - } - - // Similarly if the value is defined at a while, it logically occurs after any - // uses in the body or condition computations. - if (value.defining_instruction()->opcode() == HloOpcode::kWhile) { - CHECK(ssa_form_); - const HloInstruction* xla_while = value.defining_instruction(); - if (call_graph_->InstructionIsNestedIn(use.instruction, - xla_while->while_body()) || - call_graph_->InstructionIsNestedIn(use.instruction, - xla_while->while_condition())) { - return true; - } - } - return false; -} - -bool HloDataflowAnalysis::LiveRangeStrictlyBefore( - const HloValue& a, const HloValue& b, const HloOrdering& ordering) const { - VLOG(4) << "LiveRangeStrictlyBefore(a = " << a.ToShortString() - << ", b = " << b.ToShortString() << ")"; - if (!IsDefinedBefore(a, b, ordering)) { - VLOG(4) << "a not defined before b"; - return false; - } - - // Live-out values from the module can never have ranges strictly before any - // other value. - if (a.live_out_of_module()) { - VLOG(4) << "a is live out of module"; - return false; - } - - // Live-out values of computations can never have ranges strictly before any - // other value in the computation (including values nested in - // subcomputations). - if (a.live_out_of_computation() && - call_graph_->InstructionIsNestedIn(b.defining_instruction(), - a.defining_instruction()->parent())) { - VLOG(4) << "a is live out of computation containing b"; - return false; - } - - // All uses of 'a' must be before 'b' is defined. - for (const HloUse& use : a.uses()) { - if (!UseIsBeforeValueDefinition(use, b, ordering)) { - VLOG(4) << "use of a (" << use << ") not before b is defined"; - return false; - } - } - - return true; -} - -bool HloDataflowAnalysis::MayInterfere(const HloValue& a, const HloValue& b, - const HloOrdering& ordering) const { - // Buffers without disjoint liveness may interfere. - return !LiveRangeStrictlyBefore(a, b, ordering) && - !LiveRangeStrictlyBefore(b, a, ordering); -} - /* static */ StatusOr> HloDataflowAnalysis::Run( HloModule* module, bool ssa_form, bool bitcast_defines_value) { @@ -855,6 +619,33 @@ StatusOr> HloDataflowAnalysis::Run( } dataflow_analysis->UpdateInstructionsAndPropagate(all_instructions); + // Add in positions to all values. + for (const std::unique_ptr& computation : + module->computations()) { + for (const std::unique_ptr& instruction : + computation->instructions()) { + for (const auto& pair : + dataflow_analysis->GetInstructionValueSet(instruction.get())) { + const ShapeIndex& index = pair.first; + const HloValueSet& value_set = pair.second; + for (const HloValue* value : value_set.values()) { + if (value->defining_instruction() != instruction.get()) { + dataflow_analysis->GetValue(value->id()) + .AddPosition(instruction.get(), index); + } + } + } + } + } + + // Construct vector of values. + dataflow_analysis->values_vector_.reserve(dataflow_analysis->values_.size()); + for (auto& pair : dataflow_analysis->values_) { + dataflow_analysis->values_vector_.push_back(&pair.second); + } + std::sort(dataflow_analysis->values_vector_.begin(), + dataflow_analysis->values_vector_.end(), HloValue::IdLessThan); + TF_DCHECK_OK(dataflow_analysis->Verify()); XLA_VLOG_LINES(1, dataflow_analysis->ToString()); @@ -865,14 +656,14 @@ StatusOr> HloDataflowAnalysis::Run( Status HloDataflowAnalysis::Verify() const { // Verify each HloValue appears in the value sets that the value's positions() // indicate. - for (const HloValue& value : values()) { - for (const HloPosition& position : value.positions()) { + for (const HloValue* value : values()) { + for (const HloPosition& position : value->positions()) { const HloValueSet& value_set = GetValueSet(position); TF_RET_CHECK(std::find(value_set.values().begin(), value_set.values().end(), - &value) != value_set.values().end()) + value) != value_set.values().end()) << "Value set at position " << position << " does not contain value " - << value.ToShortString(); + << value->ToShortString(); } } @@ -898,75 +689,4 @@ Status HloDataflowAnalysis::Verify() const { return Status::OK(); } -Status HloDataflowAnalysis::VerifyAgainstReference() const { - TF_RETURN_IF_ERROR(Verify()); - - TF_ASSIGN_OR_RETURN(std::unique_ptr reference, - Run(module_, ssa_form_, bitcast_defines_value_)); - TF_RETURN_IF_ERROR(reference->Verify()); - - VLOG(2) << "This analysis:"; - XLA_VLOG_LINES(2, ToString()); - VLOG(2) << "Reference:"; - XLA_VLOG_LINES(2, reference->ToString()); - - // Verify value sets in each position are identical. - for (const auto& computation : module_->computations()) { - for (const auto& instruction : computation->instructions()) { - for (const auto& pair : GetInstructionValueSet(instruction.get())) { - const ShapeIndex& index = pair.first; - const HloValueSet& value_set = pair.second; - const HloValueSet& reference_value_set = - reference->GetValueSet(instruction.get(), index); - - auto value_in_set = [](const HloValue& v, const HloValueSet& vset) { - return std::find_if(vset.values().begin(), vset.values().end(), - [&v](const HloValue* w) { return *w == v; }) != - vset.values().end(); - }; - - for (const HloValue* value : value_set.values()) { - TF_RET_CHECK(value_in_set(*value, reference_value_set)) - << "Value " << value->ToShortString() - << " does not exist in reference"; - } - for (const HloValue* reference_value : reference_value_set.values()) { - TF_RET_CHECK(value_in_set(*reference_value, value_set)) - << "Value " << reference_value->ToShortString() - << " only exists in reference"; - } - } - } - } - - // Verify all phis resolve identically and uses are identical. - for (const HloValue& value : values()) { - const HloValue& reference_value = reference->GetValueDefinedAt( - value.defining_instruction(), value.defining_index()); - TF_RET_CHECK(value.is_phi() == reference_value.is_phi()); - if (value.is_phi()) { - const HloValue* resolved_value = ResolvePhi(value); - const HloValue* reference_resolved_value = - reference->ResolvePhi(reference_value); - if (resolved_value == nullptr) { - TF_RET_CHECK(reference_resolved_value == nullptr); - } else { - TF_RET_CHECK(reference_resolved_value != nullptr); - TF_RET_CHECK(*reference_resolved_value == *resolved_value); - } - } - - for (const HloUse& use : value.uses()) { - TF_RET_CHECK(std::find(reference_value.uses().begin(), - reference_value.uses().end(), - use) != reference_value.uses().end()); - } - for (const HloUse& reference_use : reference_value.uses()) { - TF_RET_CHECK(std::find(value.uses().begin(), value.uses().end(), - reference_use) != value.uses().end()); - } - } - return Status::OK(); -} - } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h index 7781cc58a3..aae257dd09 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h @@ -88,10 +88,10 @@ class HloDataflowAnalysis { // given position. const HloValueSet& GetValueSet(const HloInstruction* instruction, const ShapeIndex& index = {}) const; - HloValueSet& GetValueSet(const HloInstruction* instruction, - const ShapeIndex& index = {}); const HloValueSet& GetValueSet(const HloPosition& position) const; HloValueSet& GetValueSet(const HloPosition& position); + HloValueSet& GetValueSet(const HloInstruction* instruction, + const ShapeIndex& index = {}); // Return the unique value in the HloValueSet at the given instruction and // shape index. CHECKs if the value set does not contain a exactly one value. @@ -108,49 +108,11 @@ class HloDataflowAnalysis { const HloValue& GetValue(HloValue::Id value_id) const; HloValue& GetValue(HloValue::Id value_id); - // Returns whether the given values interfere assuming the given HLO - // ordering. Two values interfere if they may both be simultaneously live. - bool MayInterfere(const HloValue& a, const HloValue& b, - const HloOrdering& ordering) const; - - // Overload which takes HloValue:Ids. - bool MayInterfere(HloValue::Id a, HloValue::Id b, - const HloOrdering& ordering) const { - return MayInterfere(GetValue(a), GetValue(b), ordering); - } - // Return the total number of HloValues. int64 value_count() const { return values_.size(); } - // Return a vector of all HloValues. - const std::vector& values() const { return values_; } - - // Updates the dataflow after the changing an operand of - // 'instruction'. Dataflow update is not possible if instructions have been - // added or removed from the graph. - void UpdateAfterChangingOperand(HloInstruction* instruction, - HloInstruction* old_operand, - HloInstruction* new_operand); - - // Updates the dataflow after the changing the root of a computation from - // 'old_root' to 'new_root'. - void UpdateAfterChangingRoot(HloInstruction* old_root, - HloInstruction* new_root); - - // Returns the non-phi HloValue that is the unique (transitive) input to the - // given phi. If no such HloValue exists (there are multiple inputs to the - // phi) then nullptr is returned. This is computed by all walking the inputs - // of the given phi value until non-phi HloValue(s) are encountered. - const HloValue* ResolvePhi(const HloValue& phi) const; - const HloValue* ResolvePhi(const HloInstruction* instruction, - const ShapeIndex& index = {}) const { - return ResolvePhi(GetValueDefinedAt(instruction, index)); - } - - // Compare the dataflow analysis against a clean recomputation of the - // analysis. Returns an error status if there is a mismatch. Useful for - // verifying the correctness after updates to the analysis. - Status VerifyAgainstReference() const; + // Return a vector of all HloValues stabily sorted by HloValue::Id. + const std::vector& values() const { return values_vector_; } // Return the call graph used for computing the dataflow. const CallGraph& call_graph() const { return *call_graph_; } @@ -161,6 +123,13 @@ class HloDataflowAnalysis { HloDataflowAnalysis(HloModule* module, bool ssa_form, bool bitcast_defines_value = false); + // Returns a new HloValue defined at the given instruction and shape index. + HloValue* NewHloValue(HloInstruction* instruction, const ShapeIndex& index, + bool is_phi = false); + + // Delete the HloValue with the given ID. + void DeleteHloValue(HloValue::Id value_id); + // Constructs and initializes the InstructionValueSets of all instructions to // contain exactly the HloValues defined by each instruction. These values can // then propagated throughout the HLO graph by calling @@ -187,10 +156,11 @@ class HloDataflowAnalysis { void UpdateInstructionsAndPropagate( tensorflow::gtl::ArraySlice instructions); - // Sets the inputs of the given phi to given value(s). - void UpdatePhiInputs( - const HloInstruction* instruction, - tensorflow::gtl::ArraySlice inputs); + // Return the result of the SSA Phi function applied to the given inputs at + // the given instruction. If skip_top_level is true, then the top level of the + // value set of 'instruction' is not modified. + bool Phi(HloInstruction* instruction, + tensorflow::gtl::ArraySlice inputs); // Updates the positions of the HloValues in the output of the given // instruction. This should be called after the instruction value set of @@ -203,20 +173,6 @@ class HloDataflowAnalysis { HloInstruction* instruction, const InstructionValueSet& new_value_set, const InstructionValueSet* prev_value_set = nullptr); - // Returns true if the live range of the given value 'a' is strictly before - // the live range of value 'b' using the given HLO ordering. - bool LiveRangeStrictlyBefore(const HloValue& a, const HloValue& b, - const HloOrdering& ordering) const; - - // Returns whether the value 'a' is defined before the value 'b' under the - // given ordering. - bool IsDefinedBefore(const HloValue& a, const HloValue& b, - const HloOrdering& ordering) const; - - // Returns whether the given use is before the given value definition. - bool UseIsBeforeValueDefinition(const HloUse& use, const HloValue& value, - const HloOrdering& ordering) const; - // Verify various invariants of the dataflow analysis. Status Verify() const; @@ -226,19 +182,19 @@ class HloDataflowAnalysis { std::unique_ptr call_graph_; - // Array of all values in the module. This is allocated once at analysis - // construction time so HloValue references are stable. Updates to the - // analysis via UpdateAfterChangingOperand and UpdateAfterChangingRoot do not - // result in the creation or destruction of any HloValues. - std::vector values_; - - // Map hold the inputs to each phi value in the module. Used by ResolvePhi. - tensorflow::gtl::FlatMap> - phi_inputs_; + // The map of all HloValues in the module. We pass around pointers to the + // mapped HloValues, so the underlying container must keep them valid despite + // mutations touching other map entries. + std::unordered_map values_; // A map from instruction to InstructionValueSet. std::unordered_map value_sets_; + + // A vector containing all HloValues sorted by HloValue::Id. + std::vector values_vector_; + + // The Id to use for the next HloValue. + HloValue::Id next_value_id_ = 0; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index 9f3dd539ef..ef0fa1d745 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -26,7 +26,6 @@ limitations under the License. #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test.h" @@ -44,8 +43,8 @@ class HloDataflowAnalysisTest : public HloTestBase, // Run dataflow analysis on the member module. For convenience returns a // reference to the generated analysis stored in analysis_. - HloDataflowAnalysis& RunAnalysis(bool ssa_form, - bool bitcast_defines_value = false) { + const HloDataflowAnalysis& RunAnalysis(bool ssa_form, + bool bitcast_defines_value = false) { analysis_ = HloDataflowAnalysis::Run(module_.get(), ssa_form, bitcast_defines_value) .ConsumeValueOrDie(); @@ -71,8 +70,8 @@ class HloDataflowAnalysisTest : public HloTestBase, const HloInstruction* b) { EXPECT_FALSE(ShapeUtil::IsTuple(a->shape())); EXPECT_FALSE(ShapeUtil::IsTuple(b->shape())); - return analysis_->MayInterfere(analysis_->GetValueDefinedAt(a), - analysis_->GetValueDefinedAt(b), ordering); + return ordering.MayInterfere(analysis_->GetValueDefinedAt(a), + analysis_->GetValueDefinedAt(b)); } std::unique_ptr module_; @@ -499,37 +498,26 @@ TEST_P(HloDataflowAnalysisTest, SingleWhile) { EXPECT_FALSE(analysis.GetValueDefinedAt(cond_constant).live_out_of_module()); if (ssa_form) { - // While instruction should define phi values. The value at index {0} is a - // degenerate phi with a single input 'constant1'. - EXPECT_TRUE(analysis.ValueIsDefinedAt(xla_while, /*index=*/{0})); - EXPECT_TRUE(analysis.GetValueDefinedAt(xla_while, /*index=*/{0}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(xla_while, /*index=*/{0}), - &analysis.GetValueDefinedAt(constant1)); - EXPECT_TRUE(analysis.ValueIsDefinedAt(body_param, /*index=*/{0})); - EXPECT_TRUE(analysis.GetValueDefinedAt(body_param, /*index=*/{0}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(body_param, /*index=*/{0}), - &analysis.GetValueDefinedAt(constant1)); - EXPECT_TRUE(analysis.ValueIsDefinedAt(cond_param, /*index=*/{0})); - EXPECT_TRUE(analysis.GetValueDefinedAt(cond_param, /*index=*/{0}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(cond_param, /*index=*/{0}), - &analysis.GetValueDefinedAt(constant1)); + // Element 0 of the tuple passed through the body so no phi value is + // defined. + EXPECT_FALSE(analysis.ValueIsDefinedAt(xla_while, /*index=*/{0})); + EXPECT_FALSE(analysis.ValueIsDefinedAt(body_param, /*index=*/{0})); + EXPECT_FALSE(analysis.ValueIsDefinedAt(cond_param, /*index=*/{0})); + // Element 1 of the tuple should be a phi value. EXPECT_TRUE(analysis.ValueIsDefinedAt(xla_while, /*index=*/{1})); EXPECT_TRUE(analysis.GetValueDefinedAt(xla_while, /*index=*/{1}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(xla_while, /*index=*/{1}), nullptr); EXPECT_TRUE(analysis.ValueIsDefinedAt(body_param, /*index=*/{1})); EXPECT_TRUE(analysis.GetValueDefinedAt(body_param, /*index=*/{1}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(body_param, /*index=*/{1}), nullptr); EXPECT_TRUE(analysis.ValueIsDefinedAt(cond_param, /*index=*/{1})); EXPECT_TRUE(analysis.GetValueDefinedAt(cond_param, /*index=*/{1}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(cond_param, /*index=*/{1}), nullptr); - EXPECT_THAT(analysis.GetValueDefinedAt(constant1).uses(), - UnorderedElementsAre(HloUse{xla_while, 0, {0}})); + EXPECT_THAT( + analysis.GetValueDefinedAt(constant1).uses(), + UnorderedElementsAre(HloUse{add, 0, {}}, HloUse{xla_while, 0, {0}})); - EXPECT_FALSE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(xla_while, /*index=*/{0}) - .live_out_of_module()); + // Constant1 passes through the body and out of the module. + EXPECT_TRUE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); EXPECT_TRUE(analysis.GetValueDefinedAt(xla_while, /*index=*/{1}) .live_out_of_module()); @@ -613,20 +601,15 @@ TEST_P(HloDataflowAnalysisTest, SequentialWhiles) { bool ssa_form = GetParam(); const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); - if (ssa_form) { - EXPECT_TRUE(analysis.GetValueDefinedAt(xla_while2).live_out_of_module()); - EXPECT_FALSE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); - } else { - // Element 0 is passed through all the while instructions and out of the - // module. - EXPECT_EQ(analysis.GetUniqueValueAt(xla_while0, /*index=*/{0}), - analysis.GetValueDefinedAt(constant1)); - EXPECT_EQ(analysis.GetUniqueValueAt(xla_while1, /*index=*/{0}), - analysis.GetValueDefinedAt(constant1)); - EXPECT_EQ(analysis.GetUniqueValueAt(xla_while2, /*index=*/{0}), - analysis.GetValueDefinedAt(constant1)); - EXPECT_TRUE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); - } + // Element 0 is passed through all the while instructions and out of the + // module.. + EXPECT_EQ(analysis.GetUniqueValueAt(xla_while0, /*index=*/{0}), + analysis.GetValueDefinedAt(constant1)); + EXPECT_EQ(analysis.GetUniqueValueAt(xla_while1, /*index=*/{0}), + analysis.GetValueDefinedAt(constant1)); + EXPECT_EQ(analysis.GetUniqueValueAt(xla_while2, /*index=*/{0}), + analysis.GetValueDefinedAt(constant1)); + EXPECT_TRUE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); } TEST_P(HloDataflowAnalysisTest, NestedWhiles) { @@ -705,13 +688,18 @@ TEST_P(HloDataflowAnalysisTest, NestedWhiles) { bool ssa_form = GetParam(); const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); + EXPECT_THAT(HloValuesAt(inner_param, /*index=*/{0}), + UnorderedElementsAre(analysis.GetValueDefinedAt(negate))); if (ssa_form) { EXPECT_TRUE(analysis.ValueIsDefinedAt(inner_param, /*index=*/{1})); EXPECT_TRUE( analysis.GetValueDefinedAt(inner_param, /*index=*/{1}).is_phi()); - EXPECT_TRUE(analysis.ValueIsDefinedAt(nested_while, /*index=*/{0})); - EXPECT_TRUE( - analysis.GetValueDefinedAt(inner_param, /*index=*/{1}).is_phi()); + + // Element 0 of the nested while is %negate. + EXPECT_FALSE(analysis.ValueIsDefinedAt(nested_while, /*index=*/{0})); + EXPECT_THAT(HloValuesAt(inner_param, /*index=*/{0}), + UnorderedElementsAre(analysis.GetValueDefinedAt(negate))); + // Element 1 is a phi value (join of %add and %constant2). EXPECT_TRUE(analysis.ValueIsDefinedAt(nested_while, /*index=*/{1})); EXPECT_TRUE( analysis.GetValueDefinedAt(nested_while, /*index=*/{1}).is_phi()); @@ -724,8 +712,6 @@ TEST_P(HloDataflowAnalysisTest, NestedWhiles) { EXPECT_TRUE( analysis.GetValueDefinedAt(entry_while, /*index=*/{1}).is_phi()); } else { - EXPECT_THAT(HloValuesAt(inner_param, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(negate))); EXPECT_THAT(HloValuesAt(inner_param, /*index=*/{1}), UnorderedElementsAre(analysis.GetValueDefinedAt(add), analysis.GetValueDefinedAt(constant2))); @@ -1496,256 +1482,6 @@ TEST_P(HloDataflowAnalysisTest, EmbeddedComputationInterference) { EXPECT_TRUE(InstructionsMayInterfere(ordering, negate, embedded_log)); } -TEST_P(HloDataflowAnalysisTest, UpdateAnalysisForWhile) { - // Test updating dataflow after modifying a module with an array shaped while: - // - // body(F32[] %param): - // %negate = Negate(%param) - // - // condition(F32[] %param): - // return Constant(false) - // - // entry: - // %constant = Constant(1.0) - // %exp = Exp(%constant) - // return While(%exp, body, condition) - // - auto body_builder = HloComputation::Builder("body"); - auto body_param = body_builder.AddInstruction( - HloInstruction::CreateParameter(0, scalar_shape_, "param")); - auto negate = body_builder.AddInstruction(HloInstruction::CreateUnary( - scalar_shape_, HloOpcode::kNegate, body_param)); - HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build()); - - // Condition computation trivially returns a constant "false". - auto cond_builder = HloComputation::Builder("condition"); - auto cond_param = cond_builder.AddInstruction( - HloInstruction::CreateParameter(0, scalar_shape_, "param")); - cond_builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(false))); - HloComputation* condition = - module_->AddEmbeddedComputation(cond_builder.Build()); - - auto builder = HloComputation::Builder(TestName()); - auto constant = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1.0))); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kExp, constant)); - auto xla_while = builder.AddInstruction( - HloInstruction::CreateWhile(scalar_shape_, condition, body, exp)); - module_->AddEntryComputation(builder.Build()); - - bool ssa_form = GetParam(); - HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); - - // Sanity check the initial dataflow analysis before transforming the HLO - // graph. - if (ssa_form) { - EXPECT_TRUE(analysis.ValueIsDefinedAt(body_param)); - EXPECT_TRUE(analysis.GetValueDefinedAt(body_param).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(body_param), nullptr); - - EXPECT_TRUE(analysis.ValueIsDefinedAt(cond_param)); - EXPECT_TRUE(analysis.GetValueDefinedAt(cond_param).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(cond_param), nullptr); - - EXPECT_FALSE(analysis.GetValueDefinedAt(exp).live_out_of_module()); - EXPECT_FALSE(analysis.GetValueDefinedAt(negate).live_out_of_module()); - } else { - EXPECT_THAT(HloValuesAt(body_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp), - analysis.GetValueDefinedAt(negate))); - EXPECT_THAT(HloValuesAt(cond_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp), - analysis.GetValueDefinedAt(negate))); - EXPECT_THAT(HloValuesAt(xla_while), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp), - analysis.GetValueDefinedAt(negate))); - - EXPECT_TRUE(analysis.GetValueDefinedAt(negate).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(exp).live_out_of_module()); - } - - // Set the body root to the body_param. Previously it was Negate(body_param). - body->set_root_instruction(body_param); - - // Prior to updating, verify that the dataflow analysis is no longer valid. - Status verify_status = analysis.VerifyAgainstReference(); - EXPECT_FALSE(verify_status.ok()); - - analysis.UpdateAfterChangingRoot(/*old_root=*/negate, - /*new_root=*/body_param); - - // Analysis should be valid after the update. - TF_EXPECT_OK(analysis.VerifyAgainstReference()); - - if (ssa_form) { - // The phis should now be resolvable as 'exp' is passed through the body - // transparently. - EXPECT_EQ(analysis.ResolvePhi(body_param), - &analysis.GetValueDefinedAt(exp)); - EXPECT_EQ(analysis.ResolvePhi(cond_param), - &analysis.GetValueDefinedAt(exp)); - EXPECT_EQ(analysis.ResolvePhi(xla_while), &analysis.GetValueDefinedAt(exp)); - EXPECT_FALSE(analysis.GetValueDefinedAt(exp).live_out_of_module()); - } else { - EXPECT_THAT(HloValuesAt(body_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp))); - EXPECT_THAT(HloValuesAt(cond_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp))); - EXPECT_THAT(HloValuesAt(xla_while), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp))); - EXPECT_TRUE(analysis.GetValueDefinedAt(exp).live_out_of_module()); - } - EXPECT_FALSE(analysis.GetValueDefinedAt(negate).live_out_of_module()); - - // Now replace the operand of the while with %constant (was %exp). - TF_ASSERT_OK(exp->ReplaceUseWith(xla_while, constant)); - analysis.UpdateAfterChangingOperand(xla_while, /*old_operand=*/exp, - /*new_operand=*/constant); - - // Verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - if (ssa_form) { - // The phis now resolve to 'constant'. - EXPECT_EQ(analysis.ResolvePhi(body_param), - &analysis.GetValueDefinedAt(constant)); - EXPECT_EQ(analysis.ResolvePhi(cond_param), - &analysis.GetValueDefinedAt(constant)); - EXPECT_EQ(analysis.ResolvePhi(xla_while), - &analysis.GetValueDefinedAt(constant)); - } else { - EXPECT_THAT(HloValuesAt(body_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant))); - EXPECT_THAT(HloValuesAt(cond_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant))); - EXPECT_THAT(HloValuesAt(xla_while), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant))); - EXPECT_TRUE(analysis.GetValueDefinedAt(constant).live_out_of_module()); - } - - // And finally make the negate the root of the body again. - body->set_root_instruction(negate); - analysis.UpdateAfterChangingRoot(/*old_root=*/body_param, - /*new_root=*/negate); - - // Verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - if (ssa_form) { - // Phis should no longer be resolvable. - EXPECT_EQ(analysis.ResolvePhi(body_param), nullptr); - EXPECT_EQ(analysis.ResolvePhi(cond_param), nullptr); - EXPECT_EQ(analysis.ResolvePhi(xla_while), nullptr); - } else { - EXPECT_THAT(HloValuesAt(body_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant), - analysis.GetValueDefinedAt(negate))); - EXPECT_THAT(HloValuesAt(cond_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant), - analysis.GetValueDefinedAt(negate))); - EXPECT_THAT(HloValuesAt(xla_while), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant), - analysis.GetValueDefinedAt(negate))); - - EXPECT_FALSE(analysis.GetValueDefinedAt(exp).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(negate).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(constant).live_out_of_module()); - } - - // After the updates, verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); -} - -TEST_P(HloDataflowAnalysisTest, UpdateOfATupleSelect) { - // Test changing the operands of kSelects of a tuple value and updating the - // dataflow. - auto builder = HloComputation::Builder(TestName()); - auto pred = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(false))); - auto a = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1.0))); - auto b = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(2.0))); - auto c = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(3.0))); - auto d = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(4.0))); - auto tuple_a = builder.AddInstruction(HloInstruction::CreateTuple({a})); - auto tuple_b = builder.AddInstruction(HloInstruction::CreateTuple({b})); - auto tuple_c = builder.AddInstruction(HloInstruction::CreateTuple({c})); - auto tuple_d = builder.AddInstruction(HloInstruction::CreateTuple({d})); - const Shape tuple_shape = tuple_a->shape(); - auto select_aa = builder.AddInstruction(HloInstruction::CreateTernary( - tuple_shape, HloOpcode::kSelect, pred, tuple_a, tuple_a)); - auto select_ab = builder.AddInstruction(HloInstruction::CreateTernary( - tuple_shape, HloOpcode::kSelect, pred, tuple_a, tuple_b)); - auto select_cd = builder.AddInstruction(HloInstruction::CreateTernary( - tuple_shape, HloOpcode::kSelect, pred, tuple_c, tuple_d)); - auto select_abcd = builder.AddInstruction(HloInstruction::CreateTernary( - tuple_shape, HloOpcode::kSelect, pred, select_ab, select_cd)); - - module_->AddEntryComputation(builder.Build()); - - bool ssa_form = GetParam(); - HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); - - // Sanity check dataflow before changing the graph and updating. - EXPECT_THAT(HloValuesAt(select_aa, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a))); - EXPECT_THAT(HloValuesAt(select_ab, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a), - analysis.GetValueDefinedAt(b))); - EXPECT_THAT(HloValuesAt(select_cd, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(c), - analysis.GetValueDefinedAt(d))); - EXPECT_THAT(HloValuesAt(select_abcd, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a), - analysis.GetValueDefinedAt(b), - analysis.GetValueDefinedAt(c), - analysis.GetValueDefinedAt(d))); - EXPECT_TRUE(analysis.GetValueDefinedAt(a).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(b).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(c).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(d).live_out_of_module()); - - // Set the rhs of 'select_aa' to be 'd'. - TF_ASSERT_OK(select_aa->ReplaceOperandWith(2, tuple_d)); - analysis.UpdateAfterChangingOperand(select_aa, /*old_operand=*/tuple_a, - /*new_operand=*/tuple_d); - - // Verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - EXPECT_THAT(HloValuesAt(select_aa, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a), - analysis.GetValueDefinedAt(d))); - - // Set the lhs of 'select_cd' to be 'a'. - TF_ASSERT_OK(select_cd->ReplaceOperandWith(1, tuple_a)); - analysis.UpdateAfterChangingOperand(select_cd, /*old_operand=*/tuple_c, - /*new_operand=*/tuple_a); - - // Verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - EXPECT_THAT(HloValuesAt(select_cd, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a), - analysis.GetValueDefinedAt(d))); - EXPECT_THAT(HloValuesAt(select_abcd, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a), - analysis.GetValueDefinedAt(b), - analysis.GetValueDefinedAt(d))); - EXPECT_TRUE(analysis.GetValueDefinedAt(a).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(b).live_out_of_module()); - EXPECT_FALSE(analysis.GetValueDefinedAt(c).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(d).live_out_of_module()); - - // After the updates, verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); -} - INSTANTIATE_TEST_CASE_P(HloDataflowAnalysisInstantiation, HloDataflowAnalysisTest, ::testing::Values(false, true)); diff --git a/tensorflow/compiler/xla/service/hlo_ordering_test.cc b/tensorflow/compiler/xla/service/hlo_ordering_test.cc index ad6070a9c1..c95e44bd5d 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering_test.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_scheduling.h" @@ -218,6 +219,94 @@ TEST_F(HloOrderingTest, InstructionsInWhileComputations) { EXPECT_FALSE(ordering.ExecutesBefore(body_param, cond_param)); } +TEST_F(HloOrderingTest, ValuesInWhileComputations) { + // Tests the ordering of values (defined by dataflow analysis) in the body and + // condition of a while instruction. HLO code: + // + // body(F32[]) %param): + // %negate = Negate(%param) + // + // condition(F32[] %param): + // %convert = Convert(%param) + // + // entry: + // %constant = Constant(1.0) + // %while = While(%constant, body, condition) + // %add = Add(%constant, %while) + // + auto module = CreateNewModule(); + const Shape scalar_shape = ShapeUtil::MakeShape(xla::F32, {}); + + auto body_builder = HloComputation::Builder("body"); + auto body_param = body_builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "body_param")); + auto negate = body_builder.AddInstruction(HloInstruction::CreateUnary( + scalar_shape, HloOpcode::kNegate, body_param)); + HloComputation* body = module->AddEmbeddedComputation(body_builder.Build()); + + auto cond_builder = HloComputation::Builder("condition"); + auto cond_param = cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "cond_param")); + auto convert = cond_builder.AddInstruction(HloInstruction::CreateConvert( + ShapeUtil::MakeShape(xla::PRED, {}), cond_param)); + HloComputation* condition = + module->AddEmbeddedComputation(cond_builder.Build()); + + auto builder = HloComputation::Builder(TestName()); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1.0))); + auto xla_while = builder.AddInstruction( + HloInstruction::CreateWhile(scalar_shape, condition, body, constant)); + auto add = builder.AddInstruction(HloInstruction::CreateBinary( + scalar_shape, HloOpcode::kAdd, constant, xla_while)); + module->AddEntryComputation(builder.Build()); + + TF_ASSERT_OK_AND_ASSIGN( + auto dataflow, HloDataflowAnalysis::Run(module.get(), /*ssa_form=*/true)); + DependencyHloOrdering ordering(module.get()); + + // Init value is defined before the while, but live range is not before the + // while because of the use of the init value in the add. + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(constant), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_FALSE( + ordering.LiveRangeStrictlyBefore(dataflow->GetValueDefinedAt(constant), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_TRUE(ordering.MayInterfere(dataflow->GetValueDefinedAt(constant), + dataflow->GetValueDefinedAt(xla_while))); + + // Any value defined in the body or condition is defined before the while, and + // has a live range strictly before the while. + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(negate), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_TRUE( + ordering.LiveRangeStrictlyBefore(dataflow->GetValueDefinedAt(negate), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_FALSE(ordering.MayInterfere(dataflow->GetValueDefinedAt(negate), + dataflow->GetValueDefinedAt(xla_while))); + + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(convert), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_TRUE( + ordering.LiveRangeStrictlyBefore(dataflow->GetValueDefinedAt(convert), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_FALSE(ordering.MayInterfere(dataflow->GetValueDefinedAt(convert), + dataflow->GetValueDefinedAt(xla_while))); + + // The live range of the while should be before the add. + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(xla_while), + dataflow->GetValueDefinedAt(add))); + ASSERT_EQ(dataflow->GetValueDefinedAt(xla_while).uses().size(), 1); + + const HloUse& while_use = dataflow->GetValueDefinedAt(xla_while).uses()[0]; + EXPECT_EQ(while_use.instruction, add); + EXPECT_TRUE(ordering.UseIsBeforeValueDefinition( + while_use, dataflow->GetValueDefinedAt(add))); + EXPECT_TRUE( + ordering.LiveRangeStrictlyBefore(dataflow->GetValueDefinedAt(xla_while), + dataflow->GetValueDefinedAt(add))); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_value.cc b/tensorflow/compiler/xla/service/hlo_value.cc index f85d8ec50d..e6cf0d37b8 100644 --- a/tensorflow/compiler/xla/service/hlo_value.cc +++ b/tensorflow/compiler/xla/service/hlo_value.cc @@ -159,12 +159,6 @@ void HloValue::AddPosition(HloInstruction* instruction, for (const HloPosition& position : positions_) { DCHECK_NE(position, new_position); } - // The shape of the new position must match existing positions. - if (!positions_.empty()) { - CHECK( - ShapeUtil::Compatible(positions_.front().shape(), new_position.shape())) - << "front: " << positions_.front() << " new: " << new_position; - } positions_.push_back(std::move(new_position)); diff --git a/tensorflow/compiler/xla/service/hlo_value.h b/tensorflow/compiler/xla/service/hlo_value.h index 63ecc25020..6872bc76a8 100644 --- a/tensorflow/compiler/xla/service/hlo_value.h +++ b/tensorflow/compiler/xla/service/hlo_value.h @@ -225,6 +225,9 @@ class HloValueSet { // already exist in the set. bool AddValue(const HloValue* value); + // Clear all values from the set. + void Clear() { values_.clear(); } + // Return the unique HLO value in the set. CHECKs if the set does not contain // exactly one value. const HloValue& GetUniqueValue() const { -- GitLab From 829962bed802cc43f000fa4959e945fd24ffcd16 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 09:45:07 -0700 Subject: [PATCH 159/294] Update feature_util to support SequenceExample proto. PiperOrigin-RevId: 167286643 --- tensorflow/core/example/feature_util.cc | 124 +++++++--- tensorflow/core/example/feature_util.h | 228 +++++++++++++------ tensorflow/core/example/feature_util_test.cc | 218 +++++++++++++++++- 3 files changed, 458 insertions(+), 112 deletions(-) diff --git a/tensorflow/core/example/feature_util.cc b/tensorflow/core/example/feature_util.cc index 6f3cc6c6c5..f0593ede82 100644 --- a/tensorflow/core/example/feature_util.cc +++ b/tensorflow/core/example/feature_util.cc @@ -18,77 +18,129 @@ limitations under the License. namespace tensorflow { namespace internal { - -::tensorflow::Feature& ExampleFeature(const string& name, - ::tensorflow::Example* example) { - ::tensorflow::Features* features = example->mutable_features(); - return (*features->mutable_feature())[name]; +Feature& ExampleFeature(const string& name, Example* example) { + return *GetFeature(name, example); } -} // namespace internal +} // namespace internal template <> -bool ExampleHasFeature(const string& name, - const Example& example) { - auto it = example.features().feature().find(name); - return (it != example.features().feature().end()) && +bool HasFeature<>(const string& key, const Features& features) { + return (features.feature().find(key) != features.feature().end()); +} + +template <> +bool HasFeature(const string& key, const Features& features) { + auto it = features.feature().find(key); + return (it != features.feature().end()) && (it->second.kind_case() == Feature::KindCase::kInt64List); } template <> -bool ExampleHasFeature(const string& name, const Example& example) { - auto it = example.features().feature().find(name); - return (it != example.features().feature().end()) && +bool HasFeature(const string& key, const Features& features) { + auto it = features.feature().find(key); + return (it != features.feature().end()) && (it->second.kind_case() == Feature::KindCase::kFloatList); } template <> -bool ExampleHasFeature(const string& name, const Example& example) { - auto it = example.features().feature().find(name); - return (it != example.features().feature().end()) && +bool HasFeature(const string& key, const Features& features) { + auto it = features.feature().find(key); + return (it != features.feature().end()) && (it->second.kind_case() == Feature::KindCase::kBytesList); } +bool HasFeatureList(const string& key, + const SequenceExample& sequence_example) { + auto& feature_list = sequence_example.feature_lists().feature_list(); + return (feature_list.find(key) != feature_list.end()); +} + template <> const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example) { - return example.features().feature().at(name).int64_list().value(); + const Feature& feature) { + return feature.int64_list().value(); } template <> protobuf::RepeatedField* GetFeatureValues( - const string& name, Example* example) { - return internal::ExampleFeature(name, example) - .mutable_int64_list() - ->mutable_value(); + Feature* feature) { + return feature->mutable_int64_list()->mutable_value(); } template <> const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example) { - return example.features().feature().at(name).float_list().value(); + const Feature& feature) { + return feature.float_list().value(); } template <> -protobuf::RepeatedField* GetFeatureValues(const string& name, - Example* example) { - return internal::ExampleFeature(name, example) - .mutable_float_list() - ->mutable_value(); +protobuf::RepeatedField* GetFeatureValues(Feature* feature) { + return feature->mutable_float_list()->mutable_value(); } template <> const protobuf::RepeatedPtrField& GetFeatureValues( - const string& name, const Example& example) { - return example.features().feature().at(name).bytes_list().value(); + const Feature& feature) { + return feature.bytes_list().value(); +} + +template <> +protobuf::RepeatedPtrField* GetFeatureValues(Feature* feature) { + return feature->mutable_bytes_list()->mutable_value(); +} + +const protobuf::RepeatedPtrField& GetFeatureList( + const string& key, const SequenceExample& sequence_example) { + return sequence_example.feature_lists().feature_list().at(key).feature(); +} + +protobuf::RepeatedPtrField* GetFeatureList( + const string& feature_list_key, SequenceExample* sequence_example) { + return (*sequence_example->mutable_feature_lists() + ->mutable_feature_list())[feature_list_key] + .mutable_feature(); +} + +template <> +Features* GetFeatures(Features* proto) { + return proto; +} + +template <> +Features* GetFeatures(Example* proto) { + return proto->mutable_features(); } template <> -protobuf::RepeatedPtrField* GetFeatureValues(const string& name, - Example* example) { - return internal::ExampleFeature(name, example) - .mutable_bytes_list() - ->mutable_value(); +const Features& GetFeatures(const Features& proto) { + return proto; } +template <> +const Features& GetFeatures(const Example& proto) { + return proto.features(); +} + +template <> +const protobuf::RepeatedField& GetFeatureValues( + const Feature& feature); + +template <> +protobuf::RepeatedField* GetFeatureValues( + Feature* feature); + +template <> +const protobuf::RepeatedField& GetFeatureValues( + const Feature& feature); + +template <> +protobuf::RepeatedField* GetFeatureValues(Feature* feature); + +template <> +const protobuf::RepeatedPtrField& GetFeatureValues( + const Feature& feature); + +template <> +protobuf::RepeatedPtrField* GetFeatureValues(Feature* feature); } // namespace tensorflow diff --git a/tensorflow/core/example/feature_util.h b/tensorflow/core/example/feature_util.h index 4004411cb1..19d6d0d0d4 100644 --- a/tensorflow/core/example/feature_util.h +++ b/tensorflow/core/example/feature_util.h @@ -13,9 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// A set of lightweight wrappers which simplify access to Example features. +// A set of lightweight wrappers which simplify access to Feature protos. // // TensorFlow Example proto uses associative maps on top of oneof fields. +// SequenceExample proto uses associative map of FeatureList. // So accessing feature values is not very convenient. // // For example, to read a first value of integer feature "tag": @@ -42,9 +43,59 @@ limitations under the License. // (RepeatedPtrField for byte list). So refer to its documentation of // RepeatedField for full list of supported methods. // -// NOTE: It is also important to mention that due to the nature of oneof proto -// fields setting a feature of one type automatically clears all values stored -// as another type with the same feature name. +// NOTE: Due to the nature of oneof proto fields setting a feature of one type +// automatically clears all values stored as another type with the same feature +// key. +// +// This library also has tools to work with SequenceExample protos. +// +// To get a value from SequenceExample.context: +// int id = GetFeatureValues("tag", se.context()).Get(0); +// To add a value to the context: +// GetFeatureValues("tag", se.mutable_context())->Add(42); +// +// To add values to feature_lists: +// AppendFeatureValues({4.0}, +// GetFeatureList("movie_ratings", &se)->Add()); +// AppendFeatureValues({5.0, 3.0}, +// GetFeatureList("movie_ratings", &se)->Add()); +// This will create a feature list keyed as "images" with two features: +// feature_lists { +// feature_list { +// key: "images" +// value { +// feature { float_list { value: [4.0] } } +// feature { float_list { value: [5.0, 3.0] } } +// } +// } } +// +// Functions exposed by this library: +// HasFeature<[FeatureType]>(key, proto) -> bool +// Returns true if a feature with the specified key, and optionally +// FeatureType, belongs to the Features or Example proto. +// HasFeatureList(key, sequence_example) -> bool +// Returns true if SequenceExample has a feature_list with the key. +// GetFeatureValues(key, proto) -> RepeatedField +// Returns values for the specified key and the FeatureType. +// Supported types for the proto: Example, Features. +// GetFeatureList(key, sequence_example) -> RepeatedPtrField +// Returns Feature protos associated with a key. +// AppendFeatureValues(begin, end, feature) +// AppendFeatureValues(container or initializer_list, feature) +// Copies values into a Feature. +// AppendFeatureValues(begin, end, key, proto) +// AppendFeatureValues(container or initializer_list, key, proto) +// Copies values into Features and Example protos with the specified key. +// +// Auxiliary functions, it is unlikely you'll need to use them directly: +// GetFeatures(proto) -> Features +// A convenience function to get Features proto. +// Supported types for the proto: Example, Features. +// GetFeature(key, proto) -> Feature* +// Returns a Feature proto for the specified key, creates a new if +// necessary. Supported types for the proto: Example, Features. +// GetFeatureValues(feature) -> RepeatedField +// Returns values of the feature for the FeatureType. #ifndef TENSORFLOW_EXAMPLE_FEATURE_H_ #define TENSORFLOW_EXAMPLE_FEATURE_H_ @@ -62,10 +113,11 @@ namespace tensorflow { namespace internal { +// DEPRECATED: Use GetFeature instead. +// TODO(gorban): Update all clients in a followup CL. // Returns a reference to a feature corresponding to the name. // Note: it will create a new Feature if it is missing in the example. -::tensorflow::Feature& ExampleFeature(const string& name, - ::tensorflow::Example* example); +Feature& ExampleFeature(const string& name, Example* example); // Specializations of RepeatedFieldTrait define a type of RepeatedField // corresponding to a selected feature type. @@ -127,89 +179,135 @@ struct FeatureTrait< } // namespace internal -// Returns true if feature with the specified name belongs to the example proto. -// Doesn't check feature type. Note that specialized versions return false if -// the feature has a wrong type. -template -bool ExampleHasFeature(const string& name, const Example& example) { - return example.features().feature().find(name) != - example.features().feature().end(); -} +// Returns true if sequence_example has a feature_list with the specified key. +bool HasFeatureList(const string& key, const SequenceExample& sequence_example); + +// A family of template functions to return mutable Features proto from a +// container proto. Supported ProtoTypes: Example, Features. +template +Features* GetFeatures(ProtoType* proto); + +template +const Features& GetFeatures(const ProtoType& proto); // Base declaration of a family of template functions to return a read only -// repeated field corresponding to a feature with the specified name. +// repeated field of feature values. template const typename internal::RepeatedFieldTrait::Type& -GetFeatureValues(const string& name, const Example& example); +GetFeatureValues(const Feature& feature); + +// Returns a read only repeated field corresponding to a feature with the +// specified name and FeatureType. Supported ProtoTypes: Example, Features. +template +const typename internal::RepeatedFieldTrait::Type& +GetFeatureValues(const string& key, const ProtoType& proto) { + return GetFeatureValues(GetFeatures(proto).feature().at(key)); +} -// Base declaration of a family of template functions to return a mutable -// repeated field corresponding to a feature with the specified name. +// Returns a mutable repeated field of a feature values. template typename internal::RepeatedFieldTrait::Type* GetFeatureValues( - const string& name, Example* example); + Feature* feature); + +// Returns a mutable repeated field corresponding to a feature with the +// specified name and FeatureType. Supported ProtoTypes: Example, Features. +template +typename internal::RepeatedFieldTrait::Type* GetFeatureValues( + const string& key, ProtoType* proto) { + ::tensorflow::Feature& feature = + (*GetFeatures(proto)->mutable_feature())[key]; + return GetFeatureValues(&feature); +} + +// Returns a Feature proto for the specified key, creates a new if necessary. +// Supported types for the proto: Example, Features. +template +Feature* GetFeature(const string& key, ProtoType* proto) { + return &(*GetFeatures(proto)->mutable_feature())[key]; +} + +// Returns a repeated field with features corresponding to a feature_list key. +const protobuf::RepeatedPtrField& GetFeatureList( + const string& key, const SequenceExample& sequence_example); + +// Returns a mutable repeated field with features corresponding to a +// feature_list key. It will create a new FeatureList if necessary. +protobuf::RepeatedPtrField* GetFeatureList( + const string& feature_list_key, SequenceExample* sequence_example); -// Copies elements from the range, defined by [first, last) into a feature. template void AppendFeatureValues(IteratorType first, IteratorType last, - const string& name, Example* example) { + Feature* feature) { using FeatureType = typename internal::FeatureTrait< typename std::iterator_traits::value_type>::Type; - std::copy(first, last, protobuf::RepeatedFieldBackInserter( - GetFeatureValues(name, example))); + std::copy(first, last, + protobuf::RepeatedFieldBackInserter( + GetFeatureValues(feature))); +} + +template +void AppendFeatureValues(std::initializer_list container, + Feature* feature) { + AppendFeatureValues(container.begin(), container.end(), feature); } -// Copies all elements from the container into a feature. template -void AppendFeatureValues(const ContainerType& container, const string& name, - Example* example) { +void AppendFeatureValues(const ContainerType& container, Feature* feature) { using IteratorType = typename ContainerType::const_iterator; - AppendFeatureValues(container.begin(), container.end(), name, - example); + AppendFeatureValues(container.begin(), container.end(), + feature); } -// Copies all elements from the initializer list into a feature. -template +// Copies elements from the range, defined by [first, last) into the feature +// obtainable from the (proto, key) combination. +template +void AppendFeatureValues(IteratorType first, IteratorType last, + const string& key, ProtoType* proto) { + AppendFeatureValues(first, last, GetFeature(key, GetFeatures(proto))); +} + +// Copies all elements from the container into a feature. +template +void AppendFeatureValues(const ContainerType& container, const string& key, + ProtoType* proto) { + using IteratorType = typename ContainerType::const_iterator; + AppendFeatureValues(container.begin(), container.end(), key, + proto); +} + +// Copies all elements from the initializer list into a Feature contained by +// Features or Example proto. +template void AppendFeatureValues(std::initializer_list container, - const string& name, Example* example) { + const string& key, ProtoType* proto) { using IteratorType = typename std::initializer_list::const_iterator; - AppendFeatureValues(container.begin(), container.end(), name, - example); + AppendFeatureValues(container.begin(), container.end(), key, + proto); } -template <> -bool ExampleHasFeature(const string& name, - const Example& example); - -template <> -bool ExampleHasFeature(const string& name, const Example& example); - -template <> -bool ExampleHasFeature(const string& name, const Example& example); - -template <> -const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example); - -template <> -protobuf::RepeatedField* GetFeatureValues( - const string& name, Example* example); - -template <> -const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example); - -template <> -protobuf::RepeatedField* GetFeatureValues(const string& name, - Example* example); - -template <> -const protobuf::RepeatedPtrField& GetFeatureValues( - const string& name, const Example& example); +// Returns true if a feature with the specified key belongs to the Features. +// The template parameter pack accepts zero or one template argument - which +// is FeatureType. If the FeatureType not specified (zero template arguments) +// the function will not check the feature type. Otherwise it will return false +// if the feature has a wrong type. +template +bool HasFeature(const string& key, const Features& features); + +// Returns true if a feature with the specified key belongs to the Example. +// Doesn't check feature type if used without FeatureType, otherwise the +// specialized versions return false if the feature has a wrong type. +template +bool HasFeature(const string& key, const Example& example) { + return HasFeature(key, GetFeatures(example)); +}; -template <> -protobuf::RepeatedPtrField* GetFeatureValues(const string& name, - Example* example); +// DEPRECATED: use HasFeature instead. +// TODO(gorban): update all clients in a followup CL. +template +bool ExampleHasFeature(const string& key, const Example& example) { + return HasFeature(key, example); +} } // namespace tensorflow #endif // TENSORFLOW_EXAMPLE_FEATURE_H_ diff --git a/tensorflow/core/example/feature_util_test.cc b/tensorflow/core/example/feature_util_test.cc index eb7b90af1b..1036e28652 100644 --- a/tensorflow/core/example/feature_util_test.cc +++ b/tensorflow/core/example/feature_util_test.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - #include "tensorflow/core/example/feature_util.h" #include @@ -38,6 +37,16 @@ TEST(GetFeatureValuesInt64Test, ReadsASingleValue) { EXPECT_EQ(42, tag.Get(0)); } +TEST(GetFeatureValuesInt64Test, ReadsASingleValueFromFeature) { + Feature feature; + feature.mutable_int64_list()->add_value(42); + + auto values = GetFeatureValues(feature); + + ASSERT_EQ(1, values.size()); + EXPECT_EQ(42, values.Get(0)); +} + TEST(GetFeatureValuesInt64Test, WritesASingleValue) { Example example; @@ -48,25 +57,33 @@ TEST(GetFeatureValuesInt64Test, WritesASingleValue) { EXPECT_EQ(42, example.features().feature().at("tag").int64_list().value(0)); } +TEST(GetFeatureValuesInt64Test, WritesASingleValueToFeature) { + Feature feature; + + GetFeatureValues(&feature)->Add(42); + + ASSERT_EQ(1, feature.int64_list().value_size()); + EXPECT_EQ(42, feature.int64_list().value(0)); +} + TEST(GetFeatureValuesInt64Test, CheckUntypedFieldExistence) { Example example; - - EXPECT_FALSE(ExampleHasFeature("tag", example)); + ASSERT_FALSE(HasFeature("tag", example)); GetFeatureValues("tag", &example)->Add(0); - EXPECT_TRUE(ExampleHasFeature("tag", example)); + EXPECT_TRUE(HasFeature("tag", example)); } TEST(GetFeatureValuesInt64Test, CheckTypedFieldExistence) { Example example; GetFeatureValues("tag", &example)->Add(3.14); - ASSERT_FALSE(ExampleHasFeature("tag", example)); + ASSERT_FALSE(HasFeature("tag", example)); GetFeatureValues("tag", &example)->Add(42); - EXPECT_TRUE(ExampleHasFeature("tag", example)); + EXPECT_TRUE(HasFeature("tag", example)); auto tag_ro = GetFeatureValues("tag", example); ASSERT_EQ(1, tag_ro.size()); EXPECT_EQ(42, tag_ro.Get(0)); @@ -87,6 +104,16 @@ TEST(GetFeatureValuesInt64Test, CopyIterableToAField) { EXPECT_EQ(3, tag_ro.Get(2)); } +TEST(GetFeatureValuesFloatTest, ReadsASingleValueFromFeature) { + Feature feature; + feature.mutable_float_list()->add_value(3.14); + + auto values = GetFeatureValues(feature); + + ASSERT_EQ(1, values.size()); + EXPECT_NEAR(3.14, values.Get(0), kTolerance); +} + TEST(GetFeatureValuesFloatTest, ReadsASingleValue) { Example example; (*example.mutable_features()->mutable_feature())["tag"] @@ -99,6 +126,15 @@ TEST(GetFeatureValuesFloatTest, ReadsASingleValue) { EXPECT_NEAR(3.14, tag.Get(0), kTolerance); } +TEST(GetFeatureValuesFloatTest, WritesASingleValueToFeature) { + Feature feature; + + GetFeatureValues(&feature)->Add(3.14); + + ASSERT_EQ(1, feature.float_list().value_size()); + EXPECT_NEAR(3.14, feature.float_list().value(0), kTolerance); +} + TEST(GetFeatureValuesFloatTest, WritesASingleValue) { Example example; @@ -115,16 +151,26 @@ TEST(GetFeatureValuesFloatTest, CheckTypedFieldExistence) { Example example; GetFeatureValues("tag", &example)->Add(42); - ASSERT_FALSE(ExampleHasFeature("tag", example)); + ASSERT_FALSE(HasFeature("tag", example)); GetFeatureValues("tag", &example)->Add(3.14); - EXPECT_TRUE(ExampleHasFeature("tag", example)); + EXPECT_TRUE(HasFeature("tag", example)); auto tag_ro = GetFeatureValues("tag", example); ASSERT_EQ(1, tag_ro.size()); EXPECT_NEAR(3.14, tag_ro.Get(0), kTolerance); } +TEST(GetFeatureValuesStringTest, ReadsASingleValueFromFeature) { + Feature feature; + feature.mutable_bytes_list()->add_value("FOO"); + + auto values = GetFeatureValues(feature); + + ASSERT_EQ(1, values.size()); + EXPECT_EQ("FOO", values.Get(0)); +} + TEST(GetFeatureValuesStringTest, ReadsASingleValue) { Example example; (*example.mutable_features()->mutable_feature())["tag"] @@ -137,6 +183,15 @@ TEST(GetFeatureValuesStringTest, ReadsASingleValue) { EXPECT_EQ("FOO", tag.Get(0)); } +TEST(GetFeatureValuesStringTest, WritesASingleValueToFeature) { + Feature feature; + + *GetFeatureValues(&feature)->Add() = "FOO"; + + ASSERT_EQ(1, feature.bytes_list().value_size()); + EXPECT_EQ("FOO", feature.bytes_list().value(0)); +} + TEST(GetFeatureValuesStringTest, WritesASingleValue) { Example example; @@ -148,15 +203,15 @@ TEST(GetFeatureValuesStringTest, WritesASingleValue) { example.features().feature().at("tag").bytes_list().value(0)); } -TEST(GetFeatureValuesBytesTest, CheckTypedFieldExistence) { +TEST(GetFeatureValuesStringTest, CheckTypedFieldExistence) { Example example; GetFeatureValues("tag", &example)->Add(42); - ASSERT_FALSE(ExampleHasFeature("tag", example)); + ASSERT_FALSE(HasFeature("tag", example)); *GetFeatureValues("tag", &example)->Add() = "FOO"; - EXPECT_TRUE(ExampleHasFeature("tag", example)); + EXPECT_TRUE(HasFeature("tag", example)); auto tag_ro = GetFeatureValues("tag", example); ASSERT_EQ(1, tag_ro.size()); EXPECT_EQ("FOO", tag_ro.Get(0)); @@ -228,5 +283,146 @@ TEST(AppendFeatureValuesTest, StringVariablesUsingInitializerList) { EXPECT_EQ("BAZ", tag_ro.Get(2)); } +TEST(SequenceExampleTest, ReadsASingleValueFromContext) { + SequenceExample se; + (*se.mutable_context()->mutable_feature())["tag"] + .mutable_int64_list() + ->add_value(42); + + auto values = GetFeatureValues("tag", se.context()); + + ASSERT_EQ(1, values.size()); + EXPECT_EQ(42, values.Get(0)); +} + +TEST(SequenceExampleTest, WritesASingleValueToContext) { + SequenceExample se; + + GetFeatureValues("tag", se.mutable_context())->Add(42); + + ASSERT_EQ(1, se.context().feature().at("tag").int64_list().value_size()); + EXPECT_EQ(42, se.context().feature().at("tag").int64_list().value(0)); +} + +TEST(SequenceExampleTest, AppendFeatureValuesToContextSingleArg) { + SequenceExample se; + + AppendFeatureValues({1.1, 2.2, 3.3}, "tag", se.mutable_context()); + + auto tag_ro = GetFeatureValues("tag", se.context()); + ASSERT_EQ(3, tag_ro.size()); + EXPECT_NEAR(1.1, tag_ro.Get(0), kTolerance); + EXPECT_NEAR(2.2, tag_ro.Get(1), kTolerance); + EXPECT_NEAR(3.3, tag_ro.Get(2), kTolerance); +} + +TEST(SequenceExampleTest, CheckTypedFieldExistence) { + SequenceExample se; + + GetFeatureValues("tag", se.mutable_context())->Add(3.14); + ASSERT_FALSE(HasFeature("tag", se.context())); + + GetFeatureValues("tag", se.mutable_context())->Add(42); + + EXPECT_TRUE(HasFeature("tag", se.context())); + auto tag_ro = GetFeatureValues("tag", se.context()); + ASSERT_EQ(1, tag_ro.size()); + EXPECT_EQ(42, tag_ro.Get(0)); +} + +TEST(SequenceExampleTest, ReturnsExistingFeatureLists) { + SequenceExample se; + (*se.mutable_feature_lists()->mutable_feature_list())["tag"] + .mutable_feature() + ->Add(); + + auto feature = GetFeatureList("tag", se); + + ASSERT_EQ(1, feature.size()); +} + +TEST(SequenceExampleTest, CreatesNewFeatureLists) { + SequenceExample se; + + GetFeatureList("tag", &se)->Add(); + + EXPECT_EQ(1, se.feature_lists().feature_list().at("tag").feature_size()); +} + +TEST(SequenceExampleTest, CheckFeatureListExistence) { + SequenceExample se; + ASSERT_FALSE(HasFeatureList("tag", se)); + + GetFeatureList("tag", &se)->Add(); + + ASSERT_TRUE(HasFeatureList("tag", se)); +} + +TEST(SequenceExampleTest, AppendFeatureValuesWithInitializerList) { + SequenceExample se; + + AppendFeatureValues({1, 2, 3}, "ids", se.mutable_context()); + AppendFeatureValues({"cam1-0", "cam2-0"}, + GetFeatureList("images", &se)->Add()); + AppendFeatureValues({"cam1-1", "cam2-2"}, + GetFeatureList("images", &se)->Add()); + + EXPECT_EQ(se.DebugString(), + "context {\n" + " feature {\n" + " key: \"ids\"\n" + " value {\n" + " int64_list {\n" + " value: 1\n" + " value: 2\n" + " value: 3\n" + " }\n" + " }\n" + " }\n" + "}\n" + "feature_lists {\n" + " feature_list {\n" + " key: \"images\"\n" + " value {\n" + " feature {\n" + " bytes_list {\n" + " value: \"cam1-0\"\n" + " value: \"cam2-0\"\n" + " }\n" + " }\n" + " feature {\n" + " bytes_list {\n" + " value: \"cam1-1\"\n" + " value: \"cam2-2\"\n" + " }\n" + " }\n" + " }\n" + " }\n" + "}\n"); +} + +TEST(SequenceExampleTest, AppendFeatureValuesWithVectors) { + SequenceExample se; + + std::vector readings{1.0, 2.5, 5.0}; + AppendFeatureValues(readings, GetFeatureList("movie_ratings", &se)->Add()); + + EXPECT_EQ(se.DebugString(), + "feature_lists {\n" + " feature_list {\n" + " key: \"movie_ratings\"\n" + " value {\n" + " feature {\n" + " float_list {\n" + " value: 1\n" + " value: 2.5\n" + " value: 5\n" + " }\n" + " }\n" + " }\n" + " }\n" + "}\n"); +} + } // namespace } // namespace tensorflow -- GitLab From 8b05d128c4accb40423eafca7118fe6fec772ef8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 10:36:16 -0700 Subject: [PATCH 160/294] Shard some tests to prevent timeouts. PiperOrigin-RevId: 167293429 --- tensorflow/python/estimator/BUILD | 9 ++++----- tensorflow/python/kernel_tests/BUILD | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 83eeeb35b6..167f9b1054 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -148,6 +148,7 @@ py_test( name = "dnn_test", size = "medium", srcs = ["canned/dnn_test.py"], + shard_count = 4, srcs_version = "PY2AND3", tags = ["no_pip"], deps = [ @@ -201,7 +202,7 @@ py_test( name = "dnn_linear_combined_test", size = "medium", srcs = ["canned/dnn_linear_combined_test.py"], - shard_count = 4, + shard_count = 8, srcs_version = "PY2AND3", tags = ["no_pip"], deps = [ @@ -552,11 +553,9 @@ py_test( name = "linear_test", size = "medium", srcs = ["canned/linear_test.py"], + shard_count = 4, srcs_version = "PY2AND3", - tags = [ - "no_pip", - "noasan", # times out b/63680444 - ], + tags = ["no_pip"], deps = [ ":linear", ":linear_testing_utils", diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 4fa1e1fee8..ed3f02aedd 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2154,7 +2154,7 @@ cuda_py_test( "//tensorflow/python:nn_grad", "//tensorflow/python:nn_ops", ], - tags = ["noasan"], # times out b/63680444 + shard_count = 2, ) cuda_py_test( -- GitLab From bfb6e8e6a06ca94dc4792588ce0da705efca81cf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 10:52:15 -0700 Subject: [PATCH 161/294] Add `gan` import to cmake. PiperOrigin-RevId: 167295726 --- tensorflow/contrib/cmake/tf_python.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 4802309937..1b706159a3 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -315,6 +315,7 @@ add_python_module("tensorflow/contrib/framework/ops") add_python_module("tensorflow/contrib/framework/python") add_python_module("tensorflow/contrib/framework/python/framework") add_python_module("tensorflow/contrib/framework/python/ops") +add_python_module("tensorflow/contrib/gan") add_python_module("tensorflow/contrib/graph_editor") add_python_module("tensorflow/contrib/graph_editor/examples") add_python_module("tensorflow/contrib/graph_editor/tests") -- GitLab From 046533f852bd7d8bffd1a0f19cc1991cdedfc92b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 11:29:50 -0700 Subject: [PATCH 162/294] Automated g4 rollback of changelist 167286643 PiperOrigin-RevId: 167301509 --- tensorflow/core/example/feature_util.cc | 124 +++------- tensorflow/core/example/feature_util.h | 228 ++++++------------- tensorflow/core/example/feature_util_test.cc | 218 +----------------- 3 files changed, 112 insertions(+), 458 deletions(-) diff --git a/tensorflow/core/example/feature_util.cc b/tensorflow/core/example/feature_util.cc index f0593ede82..6f3cc6c6c5 100644 --- a/tensorflow/core/example/feature_util.cc +++ b/tensorflow/core/example/feature_util.cc @@ -18,129 +18,77 @@ limitations under the License. namespace tensorflow { namespace internal { -Feature& ExampleFeature(const string& name, Example* example) { - return *GetFeature(name, example); -} - -} // namespace internal -template <> -bool HasFeature<>(const string& key, const Features& features) { - return (features.feature().find(key) != features.feature().end()); +::tensorflow::Feature& ExampleFeature(const string& name, + ::tensorflow::Example* example) { + ::tensorflow::Features* features = example->mutable_features(); + return (*features->mutable_feature())[name]; } +} // namespace internal + template <> -bool HasFeature(const string& key, const Features& features) { - auto it = features.feature().find(key); - return (it != features.feature().end()) && +bool ExampleHasFeature(const string& name, + const Example& example) { + auto it = example.features().feature().find(name); + return (it != example.features().feature().end()) && (it->second.kind_case() == Feature::KindCase::kInt64List); } template <> -bool HasFeature(const string& key, const Features& features) { - auto it = features.feature().find(key); - return (it != features.feature().end()) && +bool ExampleHasFeature(const string& name, const Example& example) { + auto it = example.features().feature().find(name); + return (it != example.features().feature().end()) && (it->second.kind_case() == Feature::KindCase::kFloatList); } template <> -bool HasFeature(const string& key, const Features& features) { - auto it = features.feature().find(key); - return (it != features.feature().end()) && +bool ExampleHasFeature(const string& name, const Example& example) { + auto it = example.features().feature().find(name); + return (it != example.features().feature().end()) && (it->second.kind_case() == Feature::KindCase::kBytesList); } -bool HasFeatureList(const string& key, - const SequenceExample& sequence_example) { - auto& feature_list = sequence_example.feature_lists().feature_list(); - return (feature_list.find(key) != feature_list.end()); -} - template <> const protobuf::RepeatedField& GetFeatureValues( - const Feature& feature) { - return feature.int64_list().value(); + const string& name, const Example& example) { + return example.features().feature().at(name).int64_list().value(); } template <> protobuf::RepeatedField* GetFeatureValues( - Feature* feature) { - return feature->mutable_int64_list()->mutable_value(); + const string& name, Example* example) { + return internal::ExampleFeature(name, example) + .mutable_int64_list() + ->mutable_value(); } template <> const protobuf::RepeatedField& GetFeatureValues( - const Feature& feature) { - return feature.float_list().value(); + const string& name, const Example& example) { + return example.features().feature().at(name).float_list().value(); } template <> -protobuf::RepeatedField* GetFeatureValues(Feature* feature) { - return feature->mutable_float_list()->mutable_value(); +protobuf::RepeatedField* GetFeatureValues(const string& name, + Example* example) { + return internal::ExampleFeature(name, example) + .mutable_float_list() + ->mutable_value(); } template <> const protobuf::RepeatedPtrField& GetFeatureValues( - const Feature& feature) { - return feature.bytes_list().value(); -} - -template <> -protobuf::RepeatedPtrField* GetFeatureValues(Feature* feature) { - return feature->mutable_bytes_list()->mutable_value(); -} - -const protobuf::RepeatedPtrField& GetFeatureList( - const string& key, const SequenceExample& sequence_example) { - return sequence_example.feature_lists().feature_list().at(key).feature(); -} - -protobuf::RepeatedPtrField* GetFeatureList( - const string& feature_list_key, SequenceExample* sequence_example) { - return (*sequence_example->mutable_feature_lists() - ->mutable_feature_list())[feature_list_key] - .mutable_feature(); -} - -template <> -Features* GetFeatures(Features* proto) { - return proto; -} - -template <> -Features* GetFeatures(Example* proto) { - return proto->mutable_features(); + const string& name, const Example& example) { + return example.features().feature().at(name).bytes_list().value(); } template <> -const Features& GetFeatures(const Features& proto) { - return proto; +protobuf::RepeatedPtrField* GetFeatureValues(const string& name, + Example* example) { + return internal::ExampleFeature(name, example) + .mutable_bytes_list() + ->mutable_value(); } -template <> -const Features& GetFeatures(const Example& proto) { - return proto.features(); -} - -template <> -const protobuf::RepeatedField& GetFeatureValues( - const Feature& feature); - -template <> -protobuf::RepeatedField* GetFeatureValues( - Feature* feature); - -template <> -const protobuf::RepeatedField& GetFeatureValues( - const Feature& feature); - -template <> -protobuf::RepeatedField* GetFeatureValues(Feature* feature); - -template <> -const protobuf::RepeatedPtrField& GetFeatureValues( - const Feature& feature); - -template <> -protobuf::RepeatedPtrField* GetFeatureValues(Feature* feature); } // namespace tensorflow diff --git a/tensorflow/core/example/feature_util.h b/tensorflow/core/example/feature_util.h index 19d6d0d0d4..4004411cb1 100644 --- a/tensorflow/core/example/feature_util.h +++ b/tensorflow/core/example/feature_util.h @@ -13,10 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// A set of lightweight wrappers which simplify access to Feature protos. +// A set of lightweight wrappers which simplify access to Example features. // // TensorFlow Example proto uses associative maps on top of oneof fields. -// SequenceExample proto uses associative map of FeatureList. // So accessing feature values is not very convenient. // // For example, to read a first value of integer feature "tag": @@ -43,59 +42,9 @@ limitations under the License. // (RepeatedPtrField for byte list). So refer to its documentation of // RepeatedField for full list of supported methods. // -// NOTE: Due to the nature of oneof proto fields setting a feature of one type -// automatically clears all values stored as another type with the same feature -// key. -// -// This library also has tools to work with SequenceExample protos. -// -// To get a value from SequenceExample.context: -// int id = GetFeatureValues("tag", se.context()).Get(0); -// To add a value to the context: -// GetFeatureValues("tag", se.mutable_context())->Add(42); -// -// To add values to feature_lists: -// AppendFeatureValues({4.0}, -// GetFeatureList("movie_ratings", &se)->Add()); -// AppendFeatureValues({5.0, 3.0}, -// GetFeatureList("movie_ratings", &se)->Add()); -// This will create a feature list keyed as "images" with two features: -// feature_lists { -// feature_list { -// key: "images" -// value { -// feature { float_list { value: [4.0] } } -// feature { float_list { value: [5.0, 3.0] } } -// } -// } } -// -// Functions exposed by this library: -// HasFeature<[FeatureType]>(key, proto) -> bool -// Returns true if a feature with the specified key, and optionally -// FeatureType, belongs to the Features or Example proto. -// HasFeatureList(key, sequence_example) -> bool -// Returns true if SequenceExample has a feature_list with the key. -// GetFeatureValues(key, proto) -> RepeatedField -// Returns values for the specified key and the FeatureType. -// Supported types for the proto: Example, Features. -// GetFeatureList(key, sequence_example) -> RepeatedPtrField -// Returns Feature protos associated with a key. -// AppendFeatureValues(begin, end, feature) -// AppendFeatureValues(container or initializer_list, feature) -// Copies values into a Feature. -// AppendFeatureValues(begin, end, key, proto) -// AppendFeatureValues(container or initializer_list, key, proto) -// Copies values into Features and Example protos with the specified key. -// -// Auxiliary functions, it is unlikely you'll need to use them directly: -// GetFeatures(proto) -> Features -// A convenience function to get Features proto. -// Supported types for the proto: Example, Features. -// GetFeature(key, proto) -> Feature* -// Returns a Feature proto for the specified key, creates a new if -// necessary. Supported types for the proto: Example, Features. -// GetFeatureValues(feature) -> RepeatedField -// Returns values of the feature for the FeatureType. +// NOTE: It is also important to mention that due to the nature of oneof proto +// fields setting a feature of one type automatically clears all values stored +// as another type with the same feature name. #ifndef TENSORFLOW_EXAMPLE_FEATURE_H_ #define TENSORFLOW_EXAMPLE_FEATURE_H_ @@ -113,11 +62,10 @@ namespace tensorflow { namespace internal { -// DEPRECATED: Use GetFeature instead. -// TODO(gorban): Update all clients in a followup CL. // Returns a reference to a feature corresponding to the name. // Note: it will create a new Feature if it is missing in the example. -Feature& ExampleFeature(const string& name, Example* example); +::tensorflow::Feature& ExampleFeature(const string& name, + ::tensorflow::Example* example); // Specializations of RepeatedFieldTrait define a type of RepeatedField // corresponding to a selected feature type. @@ -179,135 +127,89 @@ struct FeatureTrait< } // namespace internal -// Returns true if sequence_example has a feature_list with the specified key. -bool HasFeatureList(const string& key, const SequenceExample& sequence_example); - -// A family of template functions to return mutable Features proto from a -// container proto. Supported ProtoTypes: Example, Features. -template -Features* GetFeatures(ProtoType* proto); - -template -const Features& GetFeatures(const ProtoType& proto); +// Returns true if feature with the specified name belongs to the example proto. +// Doesn't check feature type. Note that specialized versions return false if +// the feature has a wrong type. +template +bool ExampleHasFeature(const string& name, const Example& example) { + return example.features().feature().find(name) != + example.features().feature().end(); +} // Base declaration of a family of template functions to return a read only -// repeated field of feature values. +// repeated field corresponding to a feature with the specified name. template const typename internal::RepeatedFieldTrait::Type& -GetFeatureValues(const Feature& feature); - -// Returns a read only repeated field corresponding to a feature with the -// specified name and FeatureType. Supported ProtoTypes: Example, Features. -template -const typename internal::RepeatedFieldTrait::Type& -GetFeatureValues(const string& key, const ProtoType& proto) { - return GetFeatureValues(GetFeatures(proto).feature().at(key)); -} +GetFeatureValues(const string& name, const Example& example); -// Returns a mutable repeated field of a feature values. +// Base declaration of a family of template functions to return a mutable +// repeated field corresponding to a feature with the specified name. template typename internal::RepeatedFieldTrait::Type* GetFeatureValues( - Feature* feature); - -// Returns a mutable repeated field corresponding to a feature with the -// specified name and FeatureType. Supported ProtoTypes: Example, Features. -template -typename internal::RepeatedFieldTrait::Type* GetFeatureValues( - const string& key, ProtoType* proto) { - ::tensorflow::Feature& feature = - (*GetFeatures(proto)->mutable_feature())[key]; - return GetFeatureValues(&feature); -} - -// Returns a Feature proto for the specified key, creates a new if necessary. -// Supported types for the proto: Example, Features. -template -Feature* GetFeature(const string& key, ProtoType* proto) { - return &(*GetFeatures(proto)->mutable_feature())[key]; -} - -// Returns a repeated field with features corresponding to a feature_list key. -const protobuf::RepeatedPtrField& GetFeatureList( - const string& key, const SequenceExample& sequence_example); - -// Returns a mutable repeated field with features corresponding to a -// feature_list key. It will create a new FeatureList if necessary. -protobuf::RepeatedPtrField* GetFeatureList( - const string& feature_list_key, SequenceExample* sequence_example); + const string& name, Example* example); +// Copies elements from the range, defined by [first, last) into a feature. template void AppendFeatureValues(IteratorType first, IteratorType last, - Feature* feature) { + const string& name, Example* example) { using FeatureType = typename internal::FeatureTrait< typename std::iterator_traits::value_type>::Type; - std::copy(first, last, - protobuf::RepeatedFieldBackInserter( - GetFeatureValues(feature))); -} - -template -void AppendFeatureValues(std::initializer_list container, - Feature* feature) { - AppendFeatureValues(container.begin(), container.end(), feature); -} - -template -void AppendFeatureValues(const ContainerType& container, Feature* feature) { - using IteratorType = typename ContainerType::const_iterator; - AppendFeatureValues(container.begin(), container.end(), - feature); -} - -// Copies elements from the range, defined by [first, last) into the feature -// obtainable from the (proto, key) combination. -template -void AppendFeatureValues(IteratorType first, IteratorType last, - const string& key, ProtoType* proto) { - AppendFeatureValues(first, last, GetFeature(key, GetFeatures(proto))); + std::copy(first, last, protobuf::RepeatedFieldBackInserter( + GetFeatureValues(name, example))); } // Copies all elements from the container into a feature. -template -void AppendFeatureValues(const ContainerType& container, const string& key, - ProtoType* proto) { +template +void AppendFeatureValues(const ContainerType& container, const string& name, + Example* example) { using IteratorType = typename ContainerType::const_iterator; - AppendFeatureValues(container.begin(), container.end(), key, - proto); + AppendFeatureValues(container.begin(), container.end(), name, + example); } -// Copies all elements from the initializer list into a Feature contained by -// Features or Example proto. -template +// Copies all elements from the initializer list into a feature. +template void AppendFeatureValues(std::initializer_list container, - const string& key, ProtoType* proto) { + const string& name, Example* example) { using IteratorType = typename std::initializer_list::const_iterator; - AppendFeatureValues(container.begin(), container.end(), key, - proto); + AppendFeatureValues(container.begin(), container.end(), name, + example); } -// Returns true if a feature with the specified key belongs to the Features. -// The template parameter pack accepts zero or one template argument - which -// is FeatureType. If the FeatureType not specified (zero template arguments) -// the function will not check the feature type. Otherwise it will return false -// if the feature has a wrong type. -template -bool HasFeature(const string& key, const Features& features); - -// Returns true if a feature with the specified key belongs to the Example. -// Doesn't check feature type if used without FeatureType, otherwise the -// specialized versions return false if the feature has a wrong type. -template -bool HasFeature(const string& key, const Example& example) { - return HasFeature(key, GetFeatures(example)); -}; +template <> +bool ExampleHasFeature(const string& name, + const Example& example); -// DEPRECATED: use HasFeature instead. -// TODO(gorban): update all clients in a followup CL. -template -bool ExampleHasFeature(const string& key, const Example& example) { - return HasFeature(key, example); -} +template <> +bool ExampleHasFeature(const string& name, const Example& example); + +template <> +bool ExampleHasFeature(const string& name, const Example& example); + +template <> +const protobuf::RepeatedField& GetFeatureValues( + const string& name, const Example& example); + +template <> +protobuf::RepeatedField* GetFeatureValues( + const string& name, Example* example); + +template <> +const protobuf::RepeatedField& GetFeatureValues( + const string& name, const Example& example); + +template <> +protobuf::RepeatedField* GetFeatureValues(const string& name, + Example* example); + +template <> +const protobuf::RepeatedPtrField& GetFeatureValues( + const string& name, const Example& example); + +template <> +protobuf::RepeatedPtrField* GetFeatureValues(const string& name, + Example* example); } // namespace tensorflow #endif // TENSORFLOW_EXAMPLE_FEATURE_H_ diff --git a/tensorflow/core/example/feature_util_test.cc b/tensorflow/core/example/feature_util_test.cc index 1036e28652..eb7b90af1b 100644 --- a/tensorflow/core/example/feature_util_test.cc +++ b/tensorflow/core/example/feature_util_test.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ + #include "tensorflow/core/example/feature_util.h" #include @@ -37,16 +38,6 @@ TEST(GetFeatureValuesInt64Test, ReadsASingleValue) { EXPECT_EQ(42, tag.Get(0)); } -TEST(GetFeatureValuesInt64Test, ReadsASingleValueFromFeature) { - Feature feature; - feature.mutable_int64_list()->add_value(42); - - auto values = GetFeatureValues(feature); - - ASSERT_EQ(1, values.size()); - EXPECT_EQ(42, values.Get(0)); -} - TEST(GetFeatureValuesInt64Test, WritesASingleValue) { Example example; @@ -57,33 +48,25 @@ TEST(GetFeatureValuesInt64Test, WritesASingleValue) { EXPECT_EQ(42, example.features().feature().at("tag").int64_list().value(0)); } -TEST(GetFeatureValuesInt64Test, WritesASingleValueToFeature) { - Feature feature; - - GetFeatureValues(&feature)->Add(42); - - ASSERT_EQ(1, feature.int64_list().value_size()); - EXPECT_EQ(42, feature.int64_list().value(0)); -} - TEST(GetFeatureValuesInt64Test, CheckUntypedFieldExistence) { Example example; - ASSERT_FALSE(HasFeature("tag", example)); + + EXPECT_FALSE(ExampleHasFeature("tag", example)); GetFeatureValues("tag", &example)->Add(0); - EXPECT_TRUE(HasFeature("tag", example)); + EXPECT_TRUE(ExampleHasFeature("tag", example)); } TEST(GetFeatureValuesInt64Test, CheckTypedFieldExistence) { Example example; GetFeatureValues("tag", &example)->Add(3.14); - ASSERT_FALSE(HasFeature("tag", example)); + ASSERT_FALSE(ExampleHasFeature("tag", example)); GetFeatureValues("tag", &example)->Add(42); - EXPECT_TRUE(HasFeature("tag", example)); + EXPECT_TRUE(ExampleHasFeature("tag", example)); auto tag_ro = GetFeatureValues("tag", example); ASSERT_EQ(1, tag_ro.size()); EXPECT_EQ(42, tag_ro.Get(0)); @@ -104,16 +87,6 @@ TEST(GetFeatureValuesInt64Test, CopyIterableToAField) { EXPECT_EQ(3, tag_ro.Get(2)); } -TEST(GetFeatureValuesFloatTest, ReadsASingleValueFromFeature) { - Feature feature; - feature.mutable_float_list()->add_value(3.14); - - auto values = GetFeatureValues(feature); - - ASSERT_EQ(1, values.size()); - EXPECT_NEAR(3.14, values.Get(0), kTolerance); -} - TEST(GetFeatureValuesFloatTest, ReadsASingleValue) { Example example; (*example.mutable_features()->mutable_feature())["tag"] @@ -126,15 +99,6 @@ TEST(GetFeatureValuesFloatTest, ReadsASingleValue) { EXPECT_NEAR(3.14, tag.Get(0), kTolerance); } -TEST(GetFeatureValuesFloatTest, WritesASingleValueToFeature) { - Feature feature; - - GetFeatureValues(&feature)->Add(3.14); - - ASSERT_EQ(1, feature.float_list().value_size()); - EXPECT_NEAR(3.14, feature.float_list().value(0), kTolerance); -} - TEST(GetFeatureValuesFloatTest, WritesASingleValue) { Example example; @@ -151,26 +115,16 @@ TEST(GetFeatureValuesFloatTest, CheckTypedFieldExistence) { Example example; GetFeatureValues("tag", &example)->Add(42); - ASSERT_FALSE(HasFeature("tag", example)); + ASSERT_FALSE(ExampleHasFeature("tag", example)); GetFeatureValues("tag", &example)->Add(3.14); - EXPECT_TRUE(HasFeature("tag", example)); + EXPECT_TRUE(ExampleHasFeature("tag", example)); auto tag_ro = GetFeatureValues("tag", example); ASSERT_EQ(1, tag_ro.size()); EXPECT_NEAR(3.14, tag_ro.Get(0), kTolerance); } -TEST(GetFeatureValuesStringTest, ReadsASingleValueFromFeature) { - Feature feature; - feature.mutable_bytes_list()->add_value("FOO"); - - auto values = GetFeatureValues(feature); - - ASSERT_EQ(1, values.size()); - EXPECT_EQ("FOO", values.Get(0)); -} - TEST(GetFeatureValuesStringTest, ReadsASingleValue) { Example example; (*example.mutable_features()->mutable_feature())["tag"] @@ -183,15 +137,6 @@ TEST(GetFeatureValuesStringTest, ReadsASingleValue) { EXPECT_EQ("FOO", tag.Get(0)); } -TEST(GetFeatureValuesStringTest, WritesASingleValueToFeature) { - Feature feature; - - *GetFeatureValues(&feature)->Add() = "FOO"; - - ASSERT_EQ(1, feature.bytes_list().value_size()); - EXPECT_EQ("FOO", feature.bytes_list().value(0)); -} - TEST(GetFeatureValuesStringTest, WritesASingleValue) { Example example; @@ -203,15 +148,15 @@ TEST(GetFeatureValuesStringTest, WritesASingleValue) { example.features().feature().at("tag").bytes_list().value(0)); } -TEST(GetFeatureValuesStringTest, CheckTypedFieldExistence) { +TEST(GetFeatureValuesBytesTest, CheckTypedFieldExistence) { Example example; GetFeatureValues("tag", &example)->Add(42); - ASSERT_FALSE(HasFeature("tag", example)); + ASSERT_FALSE(ExampleHasFeature("tag", example)); *GetFeatureValues("tag", &example)->Add() = "FOO"; - EXPECT_TRUE(HasFeature("tag", example)); + EXPECT_TRUE(ExampleHasFeature("tag", example)); auto tag_ro = GetFeatureValues("tag", example); ASSERT_EQ(1, tag_ro.size()); EXPECT_EQ("FOO", tag_ro.Get(0)); @@ -283,146 +228,5 @@ TEST(AppendFeatureValuesTest, StringVariablesUsingInitializerList) { EXPECT_EQ("BAZ", tag_ro.Get(2)); } -TEST(SequenceExampleTest, ReadsASingleValueFromContext) { - SequenceExample se; - (*se.mutable_context()->mutable_feature())["tag"] - .mutable_int64_list() - ->add_value(42); - - auto values = GetFeatureValues("tag", se.context()); - - ASSERT_EQ(1, values.size()); - EXPECT_EQ(42, values.Get(0)); -} - -TEST(SequenceExampleTest, WritesASingleValueToContext) { - SequenceExample se; - - GetFeatureValues("tag", se.mutable_context())->Add(42); - - ASSERT_EQ(1, se.context().feature().at("tag").int64_list().value_size()); - EXPECT_EQ(42, se.context().feature().at("tag").int64_list().value(0)); -} - -TEST(SequenceExampleTest, AppendFeatureValuesToContextSingleArg) { - SequenceExample se; - - AppendFeatureValues({1.1, 2.2, 3.3}, "tag", se.mutable_context()); - - auto tag_ro = GetFeatureValues("tag", se.context()); - ASSERT_EQ(3, tag_ro.size()); - EXPECT_NEAR(1.1, tag_ro.Get(0), kTolerance); - EXPECT_NEAR(2.2, tag_ro.Get(1), kTolerance); - EXPECT_NEAR(3.3, tag_ro.Get(2), kTolerance); -} - -TEST(SequenceExampleTest, CheckTypedFieldExistence) { - SequenceExample se; - - GetFeatureValues("tag", se.mutable_context())->Add(3.14); - ASSERT_FALSE(HasFeature("tag", se.context())); - - GetFeatureValues("tag", se.mutable_context())->Add(42); - - EXPECT_TRUE(HasFeature("tag", se.context())); - auto tag_ro = GetFeatureValues("tag", se.context()); - ASSERT_EQ(1, tag_ro.size()); - EXPECT_EQ(42, tag_ro.Get(0)); -} - -TEST(SequenceExampleTest, ReturnsExistingFeatureLists) { - SequenceExample se; - (*se.mutable_feature_lists()->mutable_feature_list())["tag"] - .mutable_feature() - ->Add(); - - auto feature = GetFeatureList("tag", se); - - ASSERT_EQ(1, feature.size()); -} - -TEST(SequenceExampleTest, CreatesNewFeatureLists) { - SequenceExample se; - - GetFeatureList("tag", &se)->Add(); - - EXPECT_EQ(1, se.feature_lists().feature_list().at("tag").feature_size()); -} - -TEST(SequenceExampleTest, CheckFeatureListExistence) { - SequenceExample se; - ASSERT_FALSE(HasFeatureList("tag", se)); - - GetFeatureList("tag", &se)->Add(); - - ASSERT_TRUE(HasFeatureList("tag", se)); -} - -TEST(SequenceExampleTest, AppendFeatureValuesWithInitializerList) { - SequenceExample se; - - AppendFeatureValues({1, 2, 3}, "ids", se.mutable_context()); - AppendFeatureValues({"cam1-0", "cam2-0"}, - GetFeatureList("images", &se)->Add()); - AppendFeatureValues({"cam1-1", "cam2-2"}, - GetFeatureList("images", &se)->Add()); - - EXPECT_EQ(se.DebugString(), - "context {\n" - " feature {\n" - " key: \"ids\"\n" - " value {\n" - " int64_list {\n" - " value: 1\n" - " value: 2\n" - " value: 3\n" - " }\n" - " }\n" - " }\n" - "}\n" - "feature_lists {\n" - " feature_list {\n" - " key: \"images\"\n" - " value {\n" - " feature {\n" - " bytes_list {\n" - " value: \"cam1-0\"\n" - " value: \"cam2-0\"\n" - " }\n" - " }\n" - " feature {\n" - " bytes_list {\n" - " value: \"cam1-1\"\n" - " value: \"cam2-2\"\n" - " }\n" - " }\n" - " }\n" - " }\n" - "}\n"); -} - -TEST(SequenceExampleTest, AppendFeatureValuesWithVectors) { - SequenceExample se; - - std::vector readings{1.0, 2.5, 5.0}; - AppendFeatureValues(readings, GetFeatureList("movie_ratings", &se)->Add()); - - EXPECT_EQ(se.DebugString(), - "feature_lists {\n" - " feature_list {\n" - " key: \"movie_ratings\"\n" - " value {\n" - " feature {\n" - " float_list {\n" - " value: 1\n" - " value: 2.5\n" - " value: 5\n" - " }\n" - " }\n" - " }\n" - " }\n" - "}\n"); -} - } // namespace } // namespace tensorflow -- GitLab From 9fe71e08a56035dd547f7f9e9f7fdc0b093d0c96 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 12:07:05 -0700 Subject: [PATCH 163/294] Adding back contrib.receptive_field test to oss. PiperOrigin-RevId: 167307311 --- tensorflow/contrib/receptive_field/BUILD | 3 --- tensorflow/contrib/receptive_field/README.md | 3 ++- .../receptive_field/python/util/receptive_field.py | 10 +++++++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/receptive_field/BUILD b/tensorflow/contrib/receptive_field/BUILD index e2d7c07510..ed2f3af08c 100644 --- a/tensorflow/contrib/receptive_field/BUILD +++ b/tensorflow/contrib/receptive_field/BUILD @@ -47,9 +47,6 @@ py_test( name = "receptive_field_test", srcs = ["python/util/receptive_field_test.py"], srcs_version = "PY2AND3", - tags = [ - "no_oss", # see b/65254194 - ], deps = [ ":receptive_field_py", "//tensorflow/contrib/framework:framework_py", diff --git a/tensorflow/contrib/receptive_field/README.md b/tensorflow/contrib/receptive_field/README.md index f7539ec145..b150b903b2 100644 --- a/tensorflow/contrib/receptive_field/README.md +++ b/tensorflow/contrib/receptive_field/README.md @@ -161,4 +161,5 @@ Effective padding (vertical) = 1482 ## Authors -André Araujo (andrefaraujo@) and Mark Sandler +André Araujo (github id: andrefaraujo) and Mark Sandler (github id: +marksandler) diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field.py b/tensorflow/contrib/receptive_field/python/util/receptive_field.py index 4e723829bf..db190a1a41 100644 --- a/tensorflow/contrib/receptive_field/python/util/receptive_field.py +++ b/tensorflow/contrib/receptive_field/python/util/receptive_field.py @@ -35,6 +35,10 @@ _UNCHANGED_RF_LAYER_OPS = [ "VariableV2", "Sub", "Rsqrt", "ConcatV2" ] +# Different ways in which padding modes may be spelled. +_VALID_PADDING = ["VALID", b"VALID"] +_SAME_PADDING = ["SAME", b"SAME"] + def _stride_size(node): """Computes stride size given a TF node. @@ -102,9 +106,9 @@ def _padding_size_conv_pool(node, kernel_size, stride): # depends on input size, we raise an exception. padding_attr = node.attr["padding"] logging.vlog(4, "padding_attr = %s", padding_attr) - if padding_attr.s == "VALID": + if padding_attr.s in _VALID_PADDING: padding = 0 - elif padding_attr.s == "SAME": + elif padding_attr.s in _SAME_PADDING: if kernel_size == 1: padding = 0 elif stride == 1: @@ -118,7 +122,7 @@ def _padding_size_conv_pool(node, kernel_size, stride): "padding may be different depending on the input image " "dimensionality. In this case, alignment check will be skipped.") else: - raise ValueError("Invalid padding operation") + raise ValueError("Invalid padding operation %s" % padding_attr.s) return padding -- GitLab From 1196fbc824b45679cba9fae8daa35cc6c02d3599 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 13:05:00 -0700 Subject: [PATCH 164/294] Use boolean literals where appropriate instead of narrowing ints PiperOrigin-RevId: 167314054 --- .../compiler/xla/service/cpu/parallel_cpu_executable.cc | 2 +- tensorflow/compiler/xla/service/elemental_ir_emitter.cc | 2 +- tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc | 2 +- tensorflow/core/framework/cancellation.cc | 4 +++- tensorflow/core/kernels/debug_ops_test.cc | 3 ++- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc index bef4ecd480..40fa3a67bd 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc @@ -241,7 +241,7 @@ Status Executor::Run() { completion_queue_.pop_front(); break; } - } while (1); + } while (true); TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice, assignment_->GetUniqueTopLevelSlice(instruction)); void* result_buffer = diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index b02138325e..350dbc321f 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -709,7 +709,7 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeRngElementGenerator( } else { auto r = ir_builder_->CreateSub(q, p); auto leading_zeros = llvm_ir::EmitCallToIntrinsic( - llvm::Intrinsic::ctlz, {r, ir_builder_->getInt1(1)}, + llvm::Intrinsic::ctlz, {r, ir_builder_->getInt1(true)}, {param_ir_type}, ir_builder_); auto in_block = ir_builder_->GetInsertBlock(); diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc index d044462f9a..5edaaba3eb 100644 --- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc @@ -334,7 +334,7 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator( SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), ir_builder_); IrArray::Index input_index(index.size()); - llvm::Value* in_bounds = ir_builder_->getInt1(1); + llvm::Value* in_bounds = ir_builder_->getInt1(true); for (size_t i = 0; i < index.size(); ++i) { llvm::Value* stridden_index = ir_builder_->CreateNSWMul( index[i], ir_builder_->getInt64(window.dimensions(i).stride())); diff --git a/tensorflow/core/framework/cancellation.cc b/tensorflow/core/framework/cancellation.cc index 1cbed62939..9da4828bba 100644 --- a/tensorflow/core/framework/cancellation.cc +++ b/tensorflow/core/framework/cancellation.cc @@ -23,7 +23,9 @@ namespace tensorflow { const CancellationToken CancellationManager::kInvalidToken = -1; CancellationManager::CancellationManager() - : is_cancelling_(false), is_cancelled_(0), next_cancellation_token_(0) {} + : is_cancelling_(false), + is_cancelled_(false), + next_cancellation_token_(0) {} void CancellationManager::StartCancel() { gtl::FlatMap callbacks_to_run; diff --git a/tensorflow/core/kernels/debug_ops_test.cc b/tensorflow/core/kernels/debug_ops_test.cc index 89bcbc9c37..37c9486594 100644 --- a/tensorflow/core/kernels/debug_ops_test.cc +++ b/tensorflow/core/kernels/debug_ops_test.cc @@ -573,7 +573,8 @@ TEST_F(DebugNumericSummaryOpTest, UInt8Success) { TEST_F(DebugNumericSummaryOpTest, BoolSuccess) { TF_ASSERT_OK(Init(DT_BOOL)); - AddInputFromArray(TensorShape({2, 3}), {0, 0, 1, 1, 1, 0}); + AddInputFromArray(TensorShape({2, 3}), + {false, false, true, true, true, false}); TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_DOUBLE, TensorShape({16})); -- GitLab From 12cf8c19575e3043ce99d363b57a313fdaabbd76 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 13:06:02 -0700 Subject: [PATCH 165/294] Fix broken link in Estimators. Move Estimators to beginning of Prog. Guide. Change title of Datasets unit. PiperOrigin-RevId: 167314186 --- tensorflow/docs_src/programmers_guide/datasets.md | 4 ++-- tensorflow/docs_src/programmers_guide/estimators.md | 2 +- tensorflow/docs_src/programmers_guide/index.md | 4 ++-- tensorflow/docs_src/programmers_guide/leftnav_files | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index bf3cb5bf19..ba26bd5e94 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -1,4 +1,4 @@ -# Using the `Dataset` API for TensorFlow Input Pipelines +# Importing Data The `Dataset` API enables you to build complex input pipelines from simple, reusable pieces. For example, the pipeline for an image model might @@ -735,7 +735,7 @@ def dataset_input_fn(): return {"image_data": image, "date_time": parsed["date_time"]}, label - # Use `Dataset.map()` to build a pair of a feature dictionary and a label + # Use `Dataset.map()` to build a pair of a feature dictionary and a label # tensor for each example. dataset = dataset.map(parser) dataset = dataset.shuffle(buffer_size=10000) diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md index a5724ea294..755bb049c9 100644 --- a/tensorflow/docs_src/programmers_guide/estimators.md +++ b/tensorflow/docs_src/programmers_guide/estimators.md @@ -134,7 +134,7 @@ The heart of every Estimator--whether pre-made or custom--is its evaluation, and prediction. When you are using a pre-made Estimator, someone else has already implemented the model function. When relying on a custom Estimator, you must write the model function yourself. A -${$extend/estimators$companion document) +@{$extend/estimators$companion document} explains how to write the model function. diff --git a/tensorflow/docs_src/programmers_guide/index.md b/tensorflow/docs_src/programmers_guide/index.md index 22fe229422..eef35d6dcc 100644 --- a/tensorflow/docs_src/programmers_guide/index.md +++ b/tensorflow/docs_src/programmers_guide/index.md @@ -4,6 +4,8 @@ The documents in this unit dive into the details of writing TensorFlow code. For TensorFlow 1.3, we revised this document extensively. The units are now as follows: + * @{$programmers_guide/estimators$Estimators}, which introduces a high-level + TensorFlow API that greatly simplifies ML programming. * @{$programmers_guide/tensors$Tensors}, which explains how to create, manipulate, and access Tensors--the fundamental object in TensorFlow. * @{$programmers_guide/variables$Variables}, which details how @@ -18,8 +20,6 @@ The units are now as follows: such as Estimators or Keras, the high-level API creates and manages graphs and sessions for you, but understanding graphs and sessions can still be helpful. - * @{$programmers_guide/estimators$Estimators}, which introduces a high-level - TensorFlow API that greatly simplifies ML programming. * @{$programmers_guide/saved_model$Saving and Restoring}, which explains how to save and restore variables and models. * @{$programmers_guide/datasets$Input Pipelines}, which explains how to diff --git a/tensorflow/docs_src/programmers_guide/leftnav_files b/tensorflow/docs_src/programmers_guide/leftnav_files index 5082e7f36c..0c42f119c9 100644 --- a/tensorflow/docs_src/programmers_guide/leftnav_files +++ b/tensorflow/docs_src/programmers_guide/leftnav_files @@ -1,8 +1,8 @@ index.md +estimators.md tensors.md variables.md graphs.md -estimators.md saved_model.md datasets.md threading_and_queues.md -- GitLab From f7733742d51dba09d4f222b3eb027c27c2c4d130 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 1 Sep 2017 13:16:35 -0700 Subject: [PATCH 166/294] Join an unjoined checked thread in fifo_queue_test. PiperOrigin-RevId: 167315380 --- tensorflow/python/kernel_tests/fifo_queue_test.py | 3 +++ tensorflow/python/kernel_tests/padding_fifo_queue_test.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/tensorflow/python/kernel_tests/fifo_queue_test.py b/tensorflow/python/kernel_tests/fifo_queue_test.py index 85e7b635d8..748135440e 100644 --- a/tensorflow/python/kernel_tests/fifo_queue_test.py +++ b/tensorflow/python/kernel_tests/fifo_queue_test.py @@ -1078,6 +1078,9 @@ class FIFOQueueTest(test.TestCase): self.assertEqual([50.0], dequeued_t.eval()) self.assertEqual([60.0], dequeued_t.eval()) + # Make sure the thread finishes before exiting. + thread.join() + def testBlockingEnqueueBeforeClose(self): with self.test_session() as sess: q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32) diff --git a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py index 53b1897f48..d8c3f9823c 100644 --- a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py +++ b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py @@ -1191,6 +1191,9 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual([50.0], dequeued_t.eval()) self.assertEqual([60.0], dequeued_t.eval()) + # Make sure the thread finishes before exiting. + thread.join() + def testBlockingEnqueueBeforeClose(self): with self.test_session() as sess: q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),)) -- GitLab From 0acf5bb38a8f208c6d9f048579a076d5bc6ff0be Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Fri, 1 Sep 2017 13:32:02 -0700 Subject: [PATCH 167/294] Added registry for variants for op ZerosLike. * Updated tf.zeros_like python wrapper to avoid calling tf.zeros on Variants. * tf.zeros_like for variants calls the appropriate callback for the given device and type_name. PiperOrigin-RevId: 167317274 --- .../core/framework/variant_op_registry.cc | 66 ++++++++++-- .../core/framework/variant_op_registry.h | 92 ++++++++++++++++ .../framework/variant_op_registry_test.cc | 100 ++++++++++++++++++ tensorflow/core/kernels/constant_op.cc | 63 +++++++---- tensorflow/core/kernels/shape_op_test.cc | 2 +- tensorflow/core/kernels/shape_ops.h | 2 +- .../python/kernel_tests/constant_op_test.py | 41 ++++++- tensorflow/python/ops/array_ops.py | 7 +- 8 files changed, 340 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/framework/variant_op_registry.cc b/tensorflow/core/framework/variant_op_registry.cc index 11756c356a..9cc7530459 100644 --- a/tensorflow/core/framework/variant_op_registry.cc +++ b/tensorflow/core/framework/variant_op_registry.cc @@ -88,7 +88,17 @@ bool DecodeUnaryVariant(Variant* variant) { if (decode_fn == nullptr) { return false; } - return (*decode_fn)(variant); + const string type_name = variant->TypeName(); + bool decoded = (*decode_fn)(variant); + if (!decoded) return false; + if (variant->TypeName() != type_name) { + LOG(ERROR) << "DecodeUnaryVariant: Variant type_name before decoding was: " + << type_name + << " but after decoding was: " << variant->TypeName() + << ". Treating this as a failure."; + return false; + } + return true; } // Add some basic registrations for use by others, e.g., for testing. @@ -101,15 +111,59 @@ string MaybeRemoveTFPrefix(const StringPiece& str) { } // namespace #define REGISTER_VARIANT_DECODE_TYPE(T) \ - REGISTER_UNARY_VARIANT_DECODE_FUNCTION(T, MaybeRemoveTFPrefix(TF_STR(T))); + REGISTER_UNARY_VARIANT_DECODE_FUNCTION(T, TF_STR(T)); // No encode/decode registered for std::complex<> and Eigen::half // objects yet. -TF_CALL_INTEGRAL_TYPES(REGISTER_VARIANT_DECODE_TYPE); -TF_CALL_float(REGISTER_VARIANT_DECODE_TYPE); -TF_CALL_double(REGISTER_VARIANT_DECODE_TYPE); -TF_CALL_bool(REGISTER_VARIANT_DECODE_TYPE); +REGISTER_VARIANT_DECODE_TYPE(int); +REGISTER_VARIANT_DECODE_TYPE(float); +REGISTER_VARIANT_DECODE_TYPE(bool); +REGISTER_VARIANT_DECODE_TYPE(double); #undef REGISTER_VARIANT_DECODE_TYPE +// Special casing ZerosLikeFn per device. +UnaryVariantOpRegistry::VariantZerosLikeFn* +UnaryVariantOpRegistry::GetZerosLikeFn(const string& device, + const string& type_name) { + auto found = zeros_like_fns.find(std::make_pair(device, type_name)); + if (found == zeros_like_fns.end()) return nullptr; + return &found->second; +} + +void UnaryVariantOpRegistry::RegisterZerosLikeFn( + const string& device, const string& type_name, + const VariantZerosLikeFn& zeros_like_fn) { + CHECK(!type_name.empty()) << "Need a valid name for UnaryVariantZerosLike"; + VariantZerosLikeFn* existing = GetZerosLikeFn(device, type_name); + CHECK_EQ(existing, nullptr) + << "Unary VariantZerosLikeFn for type_name: " << type_name + << " already registered for device type: " << device; + zeros_like_fns.insert( + std::pair, VariantZerosLikeFn>( + std::make_pair(device, type_name), zeros_like_fn)); +} + +namespace { + +template +Status ZerosLikeVariantPrimitiveType(OpKernelContext* ctx, const T& t, + T* t_out) { + *t_out = T(0); + return Status::OK(); +} +} // namespace + +#define REGISTER_VARIANT_ZEROS_LIKE_TYPE(T) \ + REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION( \ + DEVICE_CPU, T, TF_STR(T), ZerosLikeVariantPrimitiveType); + +// No zeros_like registered for std::complex<> or Eigen::half objects yet. +REGISTER_VARIANT_ZEROS_LIKE_TYPE(int); +REGISTER_VARIANT_ZEROS_LIKE_TYPE(float); +REGISTER_VARIANT_ZEROS_LIKE_TYPE(double); +REGISTER_VARIANT_ZEROS_LIKE_TYPE(bool); + +#undef REGISTER_VARIANT_ZEROS_LIKE_TYPE + } // namespace tensorflow diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h index 389b049fa0..37e54f82c0 100644 --- a/tensorflow/core/framework/variant_op_registry.h +++ b/tensorflow/core/framework/variant_op_registry.h @@ -19,11 +19,13 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/variant.h" #include "tensorflow/core/framework/variant_encode_decode.h" namespace tensorflow { +class OpKernelContext; // A global UnaryVariantOpRegistry is used to hold callback functions // for different variant types. To be used by ShapeOp, RankOp, and // SizeOp, decoding, etc. @@ -32,6 +34,8 @@ class UnaryVariantOpRegistry { public: typedef std::function VariantShapeFn; typedef std::function VariantDecodeFn; + typedef std::function + VariantZerosLikeFn; // Add a shape lookup function to the registry. void RegisterShapeFn(const string& type_name, const VariantShapeFn& shape_fn); @@ -46,11 +50,29 @@ class UnaryVariantOpRegistry { // Returns nullptr if no decode function was found for the given TypeName. VariantDecodeFn* GetDecodeFn(const string& type_name); + // Add a zeros-like function to the registry. + void RegisterZerosLikeFn(const string& device, const string& type_name, + const VariantZerosLikeFn& zeros_like_fn); + + // Returns nullptr if no zeros-like function was found for the given + // device and TypeName. + VariantZerosLikeFn* GetZerosLikeFn(const string& device, + const string& type_name); + static UnaryVariantOpRegistry* Global(); private: std::unordered_map shape_fns; std::unordered_map decode_fns; + // Map std::pair to function. + struct PairHash { + template + std::size_t operator()(const std::pair& x) const { + return std::hash()(x.first) ^ std::hash()(x.second); + } + }; + std::unordered_map, VariantZerosLikeFn, PairHash> + zeros_like_fns; }; // Gets a TensorShape from a Tensor containing a scalar Variant. @@ -72,6 +94,28 @@ Status GetUnaryVariantShape(const Tensor& variant_tensor, TensorShape* shape); // bool DecodeUnaryVariant(Variant* variant); +// Sets *z_out = zeros_like(v). The variant v must have a registered +// ZerosLike function for the given Device. Returns an Internal error +// if v does not have a registered zeros_like function for this device, or if +// ZerosLike fails. +// +// REQUIRES: +// v_out is not null. +// +template +Status CreateZerosLikeVariant(OpKernelContext* ctx, const Variant& v, + Variant* v_out) { + const string& device = DeviceName::value; + UnaryVariantOpRegistry::VariantZerosLikeFn* zeros_like_fn = + UnaryVariantOpRegistry::Global()->GetZerosLikeFn(device, v.TypeName()); + if (zeros_like_fn == nullptr) { + return errors::Internal( + "No unary variant zeros_like function found for Variant type_name: ", + v.TypeName(), " for device type: ", device); + } + return (*zeros_like_fn)(ctx, v, v_out); +} + namespace variant_op_registry_fn_registration { template @@ -120,6 +164,34 @@ class UnaryVariantDecodeRegistration { } }; +template +class UnaryVariantZerosLikeRegistration { + typedef std::function + LocalVariantZerosLikeFn; + + public: + UnaryVariantZerosLikeRegistration( + const string& device, const string& type_name, + const LocalVariantZerosLikeFn& zeros_like_fn) { + auto wrapped_fn = [type_name, zeros_like_fn](OpKernelContext* ctx, + const Variant& v, + Variant* v_out) -> Status { + CHECK_NOTNULL(v_out); + *v_out = T(); + if (v.get() == nullptr) { + return errors::Internal( + "VariantZerosLikeFn: Could not access object, type_name: ", + type_name); + } + const T& t = *v.get(); + T* t_out = v_out->get(); + return zeros_like_fn(ctx, t, t_out); + }; + UnaryVariantOpRegistry::Global()->RegisterZerosLikeFn(device, type_name, + wrapped_fn); + } +}; + }; // namespace variant_op_registry_fn_registration // Register a unary shape variant function with the signature: @@ -151,6 +223,26 @@ class UnaryVariantDecodeRegistration { T> \ register_unary_variant_op_decoder_fn_##ctr(type_name) +// Register a unary zeros_like variant function with the signature: +// Status ZerosLikeFn(OpKernelContext* ctx, const T& t, T* t_out); +// to Variants having TypeName type_name, for device string device. +#define REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION(device, T, type_name, \ + zeros_like_function) \ + REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION_UNIQ_HELPER( \ + __COUNTER__, device, T, type_name, zeros_like_function) + +#define REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION_UNIQ_HELPER( \ + ctr, device, T, type_name, zeros_like_function) \ + REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION_UNIQ(ctr, device, T, type_name, \ + zeros_like_function) + +#define REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION_UNIQ( \ + ctr, device, T, type_name, zeros_like_function) \ + static variant_op_registry_fn_registration:: \ + UnaryVariantZerosLikeRegistration \ + register_unary_variant_op_decoder_fn_##ctr(device, type_name, \ + zeros_like_function) + } // end namespace tensorflow #endif // TENSORFLOW_FRAMEWORK_VARIANT_OP_REGISTRY_H_ diff --git a/tensorflow/core/framework/variant_op_registry_test.cc b/tensorflow/core/framework/variant_op_registry_test.cc index 86fef53dbe..4e79180217 100644 --- a/tensorflow/core/framework/variant_op_registry_test.cc +++ b/tensorflow/core/framework/variant_op_registry_test.cc @@ -15,13 +15,25 @@ limitations under the License. #include +#define EIGEN_USE_THREADS + +#if GOOGLE_CUDA +#define EIGEN_USE_GPU +#endif + #include "tensorflow/core/framework/variant_op_registry.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + namespace { struct VariantValue { @@ -33,7 +45,24 @@ struct VariantValue { *s = TensorShape({-0xdeadbeef}); return Status::OK(); } + static Status CPUZerosLikeFn(OpKernelContext* ctx, const VariantValue& v, + VariantValue* v_out) { + if (v.early_exit) { + return errors::InvalidArgument("early exit zeros_like!"); + } + v_out->zeros_like_set = 1; // CPU + return Status::OK(); + } + static Status GPUZerosLikeFn(OpKernelContext* ctx, const VariantValue& v, + VariantValue* v_out) { + if (v.early_exit) { + return errors::InvalidArgument("early exit zeros_like!"); + } + v_out->zeros_like_set = 2; // GPU + return Status::OK(); + } bool early_exit; + int zeros_like_set; }; REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(VariantValue, "TEST VariantValue", @@ -41,6 +70,14 @@ REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(VariantValue, "TEST VariantValue", REGISTER_UNARY_VARIANT_DECODE_FUNCTION(VariantValue, "TEST VariantValue"); +REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION(DEVICE_CPU, VariantValue, + "TEST VariantValue", + VariantValue::CPUZerosLikeFn); + +REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION(DEVICE_GPU, VariantValue, + "TEST VariantValue", + VariantValue::GPUZerosLikeFn); + } // namespace TEST(VariantOpShapeRegistryTest, TestBasic) { @@ -101,4 +138,67 @@ TEST(VariantOpDecodeRegistryTest, TestDuplicate) { "fjfjfj already registered"); } +TEST(VariantOpZerosLikeRegistryTest, TestBasicCPU) { + EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetZerosLikeFn( + DEVICE_CPU, "YOU SHALL NOT PASS"), + nullptr); + + VariantValue vv_early_exit{true /* early_exit */, 0 /* zeros_like_set */}; + Variant v = vv_early_exit; + Variant v_out = VariantValue(); + + OpKernelContext* null_context_pointer = nullptr; + Status s0 = + CreateZerosLikeVariant(null_context_pointer, v, &v_out); + EXPECT_FALSE(s0.ok()); + EXPECT_TRUE( + StringPiece(s0.error_message()).contains("early exit zeros_like")); + + VariantValue vv_ok{false /* early_exit */, 0 /* zeros_like_set */}; + v = vv_ok; + TF_EXPECT_OK( + CreateZerosLikeVariant(null_context_pointer, v, &v_out)); + VariantValue* vv_out = CHECK_NOTNULL(v_out.get()); + EXPECT_EQ(vv_out->zeros_like_set, 1); // CPU +} + +#if GOOGLE_CUDA +TEST(VariantOpZerosLikeRegistryTest, TestBasicGPU) { + EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetZerosLikeFn( + DEVICE_GPU, "YOU SHALL NOT PASS"), + nullptr); + + VariantValue vv_early_exit{true /* early_exit */, 0 /* zeros_like_set */}; + Variant v = vv_early_exit; + Variant v_out = VariantValue(); + + OpKernelContext* null_context_pointer = nullptr; + Status s0 = + CreateZerosLikeVariant(null_context_pointer, v, &v_out); + EXPECT_FALSE(s0.ok()); + EXPECT_TRUE( + StringPiece(s0.error_message()).contains("early exit zeros_like")); + + VariantValue vv_ok{false /* early_exit */, 0 /* zeros_like_set */}; + v = vv_ok; + TF_EXPECT_OK( + CreateZerosLikeVariant(null_context_pointer, v, &v_out)); + VariantValue* vv_out = CHECK_NOTNULL(v_out.get()); + EXPECT_EQ(vv_out->zeros_like_set, 2); // GPU +} +#endif // GOOGLE_CUDA + +TEST(VariantOpZerosLikeRegistryTest, TestDuplicate) { + UnaryVariantOpRegistry registry; + UnaryVariantOpRegistry::VariantZerosLikeFn f; + + registry.RegisterZerosLikeFn(DEVICE_CPU, "fjfjfj", f); + EXPECT_DEATH(registry.RegisterZerosLikeFn(DEVICE_CPU, "fjfjfj", f), + "fjfjfj already registered"); + + registry.RegisterZerosLikeFn(DEVICE_GPU, "fjfjfj", f); + EXPECT_DEATH(registry.RegisterZerosLikeFn(DEVICE_GPU, "fjfjfj", f), + "fjfjfj already registered"); +} + } // namespace tensorflow diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index b4b37dd4b8..cdc1145282 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -17,6 +17,10 @@ limitations under the License. #define EIGEN_USE_THREADS +#if GOOGLE_CUDA +#define EIGEN_USE_GPU +#endif + #include "tensorflow/core/kernels/constant_op.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" @@ -26,13 +30,14 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/variant_op_registry.h" #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/kernels/fill_functor.h" #include "tensorflow/core/platform/macros.h" #ifdef TENSORFLOW_USE_SYCL #include "tensorflow/core/common_runtime/sycl/sycl_util.h" -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL namespace tensorflow { @@ -40,9 +45,8 @@ ConstantOp::ConstantOp(OpKernelConstruction* ctx) : OpKernel(ctx), tensor_(ctx->output_type(0)) { const TensorProto* proto = nullptr; OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto)); - OP_REQUIRES_OK(ctx, - ctx->device()->MakeTensorFromProto( - *proto, AllocatorAttributes(), &tensor_)); + OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto( + *proto, AllocatorAttributes(), &tensor_)); OP_REQUIRES( ctx, ctx->output_type(0) == tensor_.dtype(), errors::InvalidArgument("Type mismatch between value (", @@ -85,9 +89,9 @@ REGISTER_KERNEL(GPU, bool); #endif #ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(D, TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Const").Device(DEVICE_##D).TypeConstraint("dtype"), \ +#define REGISTER_SYCL_KERNEL(D, TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Const").Device(DEVICE_##D).TypeConstraint("dtype"), \ ConstantOp); REGISTER_SYCL_KERNEL(SYCL, float); REGISTER_SYCL_KERNEL(SYCL, double); @@ -194,18 +198,18 @@ struct FillFunctor { void operator()(const SYCLDevice& d, typename TTypes::Flat out, typename TTypes::ConstScalar in) { #if !defined(EIGEN_HAS_INDEX_LIST) - Eigen::array rank1{1}; + Eigen::array rank1{1}; #else - Eigen::IndexList> rank1; + Eigen::IndexList > rank1; #endif - const int size = out.dimension(0); - Eigen::array broadcast_dims{size}; + const int size = out.dimension(0); + Eigen::array broadcast_dims{size}; - To32Bit(out).device(d) = in.reshape(rank1).broadcast(broadcast_dims); + To32Bit(out).device(d) = in.reshape(rank1).broadcast(broadcast_dims); } }; -} -#endif // TENSORFLOW_USE_SYCL +} // namespace functor +#endif // TENSORFLOW_USE_SYCL #define REGISTER_KERNEL(D, TYPE) \ REGISTER_KERNEL_BUILDER(Name("Fill") \ @@ -273,11 +277,23 @@ class ZerosLikeOp : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& input = ctx->input(0); - Tensor* out = nullptr; - OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output( - {0}, 0, input.shape(), &out)); - functor::SetZeroFunctor f; - f(ctx->eigen_device(), out->flat()); + const Device& d = ctx->eigen_device(); + if (std::is_same::value) { + OP_REQUIRES(ctx, input.dims() == 0, + errors::InvalidArgument( + "ZerosLike of non-unary Variant not supported.")); + const Variant& v = input.scalar()(); + Tensor out(cpu_allocator(), DT_VARIANT, TensorShape({})); + Variant* out_v = &(out.scalar()()); + OP_REQUIRES_OK(ctx, CreateZerosLikeVariant(ctx, v, out_v)); + ctx->set_output(0, out); + } else { + Tensor* out = nullptr; + OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output( + {0}, 0, input.shape(), &out)); + functor::SetZeroFunctor f; + f(d, out->flat()); + } } }; @@ -288,6 +304,7 @@ class ZerosLikeOp : public OpKernel { #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU) TF_CALL_POD_STRING_TYPES(REGISTER_CPU); +REGISTER_CPU(Variant); #undef REGISTER_CPU #ifdef TENSORFLOW_USE_SYCL @@ -315,6 +332,14 @@ REGISTER_KERNEL_BUILDER(Name("ZerosLike") .TypeConstraint("T") .HostMemory("y"), ZerosLikeOp); +// TODO(ebrevdo): Once rendezvous has been properly set up for +// Variants, we'll no longer need a HostMemory attribute for this case. +REGISTER_KERNEL_BUILDER(Name("ZerosLike") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("x") + .HostMemory("y"), + ZerosLikeOp); #endif // GOOGLE_CUDA #undef REGISTER_KERNEL diff --git a/tensorflow/core/kernels/shape_op_test.cc b/tensorflow/core/kernels/shape_op_test.cc index a305598fe2..96eaa4ac75 100644 --- a/tensorflow/core/kernels/shape_op_test.cc +++ b/tensorflow/core/kernels/shape_op_test.cc @@ -101,7 +101,7 @@ TEST_F(ShapeOpTest, Simple) { Tensor variant_tensor(DT_VARIANT, TensorShape({1})); Status s = session.Run({{input, variant_tensor}}, {shape_output}, &outputs); EXPECT_FALSE(s.ok()); - ExpectHasError(s, "Shape of non-scalar Variant not supported."); + ExpectHasError(s, "Shape of non-unary Variant not supported."); } { diff --git a/tensorflow/core/kernels/shape_ops.h b/tensorflow/core/kernels/shape_ops.h index 0c39d46aea..ac607f4e8b 100644 --- a/tensorflow/core/kernels/shape_ops.h +++ b/tensorflow/core/kernels/shape_ops.h @@ -35,7 +35,7 @@ inline Status GetRegularOrVariantShape(OpKernelContext* ctx, int input_index, if (ctx->input_dtype(0) == DT_VARIANT) { if (inp.dims() != 0) { return errors::InvalidArgument( - "Shape of non-scalar Variant not supported."); + "Shape of non-unary Variant not supported."); } TF_RETURN_IF_ERROR(GetUnaryVariantShape(inp, shape)); } else { diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index df413939c7..6167cb9999 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -119,11 +120,11 @@ class ConstantTest(test.TestCase): variant_val=[ tensor_pb2.VariantTensorDataProto( # Match registration in variant_op_registry.cc - type_name=b"int32", + type_name=b"int", metadata=np.array(1, dtype=np.int32).tobytes()) ]) - const_op = constant_op.constant(variant_tensor).op - const_value = const_op.get_attr("value") + const = constant_op.constant(variant_tensor) + const_value = const.op.get_attr("value") # Ensure we stored the tensor proto properly. self.assertProtoEquals(variant_tensor, const_value) @@ -134,7 +135,10 @@ class ConstantTest(test.TestCase): # native numpy types cannot be passed to ops.convert_to_tensor. # TODO(ebrevdo): Add registration mechanism for # ops.convert_to_tensor and for session.run output. - const_op.run() + logging_const_op = logging_ops.Print( + const, [const], + message="Variant storing an int, decoded const value:").op + logging_const_op.run() def testStringWithNulls(self): with self.test_session(): @@ -469,6 +473,35 @@ class ZerosLikeTest(test.TestCase): self.assertEqual(y.shape, shape) self.assertAllEqual(y, np.zeros(shape, dtype=out_type)) + def testZerosLikeVariant(self): + # TODO(ebrevdo): Re-enable use_gpu=True once non-DMA Variant + # copying between CPU and GPU is supported AND we register a + # ZerosLike callback for GPU for Variant storing primitive types + # in variant_op_registry.cc. + with self.test_session(use_gpu=False): + variant_tensor = tensor_pb2.TensorProto( + dtype=dtypes_lib.variant.as_datatype_enum, + tensor_shape=tensor_shape.TensorShape([]).as_proto(), + variant_val=[ + tensor_pb2.VariantTensorDataProto( + # Match registration in variant_op_registry.cc + type_name=b"int", + metadata=np.array(1, dtype=np.int32).tobytes()) + ]) + const_variant = constant_op.constant(variant_tensor) + zeros_like = array_ops.zeros_like(const_variant) + zeros_like_op = logging_ops.Print( + zeros_like, [const_variant, zeros_like], + message="Variant storing an int, input and output of zeros_like:").op + + # Smoke test -- ensure this executes without trouble. + # Right now, non-numpy-compatible objects cannot be returned from a + # session.run call; similarly, objects that can't be converted to + # native numpy types cannot be passed to ops.convert_to_tensor. + # TODO(ebrevdo): Add registration mechanism for + # ops.convert_to_tensor and for session.run output. + zeros_like_op.run() + class OnesTest(test.TestCase): diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 2b9306e874..33ba5df7a6 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1466,12 +1466,15 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True): with ops.name_scope(name, "zeros_like", [tensor]) as name: tensor = ops.convert_to_tensor(tensor, name="tensor") - if tensor.shape.is_fully_defined(): + # For now, variant types must be created via zeros_like; as we need to + # pass the input variant object to the proper zeros callback. + + if tensor.shape.is_fully_defined() and tensor.dtype != dtypes.variant: # We can produce a zeros tensor independent of the value of 'tensor', # since the shape is known statically. return zeros(tensor.shape, dtype=dtype or tensor.dtype, name=name) - if dtype is not None and dtype != tensor.dtype: + if dtype is not None and dtype != tensor.dtype and dtype != dtypes.variant: return zeros( shape_internal(tensor, optimize=optimize), dtype=dtype, name=name) else: -- GitLab From 6d6118f14f74ec7d1ac3b4a9f0a7493624ef4caa Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 1 Sep 2017 13:41:13 -0700 Subject: [PATCH 168/294] Change global step to int64. PiperOrigin-RevId: 167318475 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 6748a76562..0578ae8b0f 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -68,12 +68,11 @@ def _create_global_step(graph): return variable_scope.get_variable( ops.GraphKeys.GLOBAL_STEP, shape=[], - dtype=dtypes.int32, + dtype=dtypes.int64, initializer=init_ops.zeros_initializer(), trainable=False, use_resource=True, - collections=[ops.GraphKeys.GLOBAL_VARIABLES, - ops.GraphKeys.GLOBAL_STEP]) + collections=[ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.GLOBAL_STEP]) def _sync_variables_ops(): -- GitLab From b897ae084a9833f88ab5619bdf8940735b7d0de5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 13:41:24 -0700 Subject: [PATCH 169/294] Fixing typo. PiperOrigin-RevId: 167318502 --- tensorflow/python/ops/variable_scope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 9093c12968..645775239f 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1698,7 +1698,7 @@ def variable_scope(name_or_scope, use when doing asynchronous distributed training. Returns: - A scope that can be to captured and reused. + A scope that can be captured and reused. Raises: ValueError: when trying to reuse within a create scope, or create within -- GitLab From 2e7fdf03319725d3e5c62c20238f49dccb474dc5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 13:47:23 -0700 Subject: [PATCH 170/294] Fix convert_to_tensor problem for list of TensorNodes Current code fail to error out in convert_to_tensor when multiple TensorNodes are passed in. PiperOrigin-RevId: 167319274 --- tensorflow/python/eager/backprop_test.py | 14 +++++++++++++ tensorflow/python/framework/ops.py | 20 +++++++++++++------ .../python/kernel_tests/array_ops_test.py | 16 +++++++-------- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 429eeabb42..b437905509 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -307,6 +307,20 @@ class BackpropTest(test.TestCase): [tensor_shape.TensorShape(s).as_proto() for s in shape_list], backprop.make_attr([pywrap_tensorflow.TF_ATTR_SHAPE], shape_list)) + def testMultiValueConvertToTensor(self): + x = resource_variable_ops.ResourceVariable( + initial_value=array_ops.constant([1.0]), name='x') + + def fn(): + tape.watch_variable(x) + a = math_ops.add(x.value(), 1.0) + # Make sure convert_to_tensor works correctly with list of TensorNodes. + b = array_ops.stack([a, a], axis=0) + return math_ops.reduce_mean(b) + + grad = backprop.implicit_grad(fn)()[0][1] + self.assertAllEqual([1.0], grad.numpy()) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 659bc394b9..b197e96886 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -49,6 +49,7 @@ from tensorflow.python.framework import versions from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import compat from tensorflow.python.util import decorator_utils +from tensorflow.python.util import nest from tensorflow.python.util import tf_contextlib # Temporary global switch determining if we should enable the work-in-progress @@ -1036,12 +1037,19 @@ def internal_convert_to_tensor(value, # tracing gradients, to ensure the same behavior happens with and without # tracing. unwrapped = ag_core.getval(value) - # Fast path for EagerTensors that don't need any conversion. - if isinstance(unwrapped, EagerTensor) and context.in_eager_mode(): - # Note that we don't check that value's dtype matches the dtype - # argument. We exepct that the C runtime will do that checking - # when we execute the kernel. - return value + + if context.in_eager_mode(): + # Fast path for EagerTensors that don't need any conversion. + if isinstance(unwrapped, EagerTensor): + # Note that we don't check that value's dtype matches the dtype + # argument. We exepct that the C runtime will do that checking + # when we execute the kernel. + return value + values = nest.flatten(value) + if (len(values) > 1 and + any(isinstance(ag_core.getval(v), EagerTensor) for v in values)): + raise TypeError("Cannot convert to a eager tensor.") + if dtype is not None: dtype = dtypes.as_dtype(dtype) unwrapped_type = type(unwrapped) diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 392639fa17..77c5bb6d40 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -981,15 +981,15 @@ class SequenceMaskTest(test_util.TensorFlowTestCase): class ConcatSliceResourceTest(test_util.TensorFlowTestCase): + @test_util.run_in_graph_and_eager_modes() def testConcatSlice(self): - with self.test_session(): - r1 = test_ops.stub_resource_handle_op(container="a", shared_name="b") - r2 = test_ops.stub_resource_handle_op(container="a", shared_name="c") - c = array_ops.stack([r1, r2]) - s = array_ops.strided_slice(c, [1], [2]) - test_ops.resource_create_op(s).run() - with self.assertRaises(errors.AlreadyExistsError): - test_ops.resource_create_op(r2).run() + r1 = test_ops.stub_resource_handle_op(container="a", shared_name="b") + r2 = test_ops.stub_resource_handle_op(container="a", shared_name="c") + c = array_ops.stack([r1, r2]) + s = array_ops.strided_slice(c, [1], [2]) + self.evaluate(test_ops.resource_create_op(s)) + with self.assertRaises(errors.AlreadyExistsError): + self.evaluate(test_ops.resource_create_op(r2)) class IdentityTest(test_util.TensorFlowTestCase): -- GitLab From 390dcd29ac75ba59a3ff6ab45ab58f80d4094dc2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 13:54:05 -0700 Subject: [PATCH 171/294] Change bfloat constructor to accept a float to avoid truncation in implicit conversion from non-integer types to uint16_t. PiperOrigin-RevId: 167320150 --- tensorflow/core/framework/bfloat16_test.cc | 3 ++- tensorflow/core/framework/numeric_types.h | 9 ++++++++- tensorflow/core/kernels/cast_op.h | 9 +-------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc index 5bd95b806f..af4e6a4411 100644 --- a/tensorflow/core/framework/bfloat16_test.cc +++ b/tensorflow/core/framework/bfloat16_test.cc @@ -23,7 +23,8 @@ namespace { TEST(Bfloat16Test, Simple) { bfloat16 a(12); - EXPECT_EQ(12, a.value); + // Floating point representation of 12: 0x41400000 + EXPECT_EQ(0x4140, a.value); } TEST(Bfloat16Test, Conversion) { diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h index 31b88707e2..a630bee38d 100644 --- a/tensorflow/core/framework/numeric_types.h +++ b/tensorflow/core/framework/numeric_types.h @@ -44,7 +44,14 @@ typedef Eigen::QUInt16 quint16; // see framework/bfloat16.h for description. struct bfloat16 { EIGEN_DEVICE_FUNC bfloat16() {} - EIGEN_DEVICE_FUNC explicit bfloat16(const uint16_t v) : value(v) {} + EIGEN_DEVICE_FUNC explicit bfloat16(const float v) { + const uint16_t* p = reinterpret_cast(&v); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + value = p[0]; +#else + value = p[1]; +#endif + } uint16_t value; }; diff --git a/tensorflow/core/kernels/cast_op.h b/tensorflow/core/kernels/cast_op.h index 5c24f164a4..59a991b5a8 100644 --- a/tensorflow/core/kernels/cast_op.h +++ b/tensorflow/core/kernels/cast_op.h @@ -121,14 +121,7 @@ struct scalar_cast_op { typedef ::tensorflow::bfloat16 result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ::tensorflow::bfloat16 operator()( const float a) const { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - const uint16_t* p = reinterpret_cast(&a); - return ::tensorflow::bfloat16(p[0]); -#else - static_assert(::tensorflow::port::kLittleEndian, "Not a little endian system!"); - const uint16_t* p = reinterpret_cast(&a); - return ::tensorflow::bfloat16(p[1]); -#endif + return ::tensorflow::bfloat16(a); } }; -- GitLab From ef1286ea349d362e8a214f0e98951586791e0719 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 13:56:27 -0700 Subject: [PATCH 172/294] Internal change PiperOrigin-RevId: 167320434 --- tensorflow/core/BUILD | 10 ++++++++++ tensorflow/core/platform/default/build_config.bzl | 3 +++ 2 files changed, 13 insertions(+) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index cd083abb50..d01f7843ee 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -122,6 +122,7 @@ load( "tf_additional_gpu_tracer_cuda_deps", "tf_pyclif_proto_library", "tf_jspb_proto_library", + "tf_nano_proto_library", ) load( "//tensorflow/core:platform/default/build_config_root.bzl", @@ -212,6 +213,15 @@ tf_jspb_proto_library( deps = [":protos_all_cc"], ) +tf_nano_proto_library( + name = "protos_all_nano_proto", + field_style = "accessors", + generate_equals = 1, + generate_intdefs = 1, + visibility = ["//visibility:public"], + deps = [":protos_all_cc"], +) + exports_files([ "framework/types.proto", ]) diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 126558cac3..e1ad66c387 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -75,6 +75,9 @@ def tf_proto_library_py(name, srcs=[], protodeps=[], deps=[], visibility=[], def tf_jspb_proto_library(**kwargs): pass +def tf_nano_proto_library(**kwargs): + pass + def tf_proto_library(name, srcs = [], has_services = None, protodeps = [], visibility = [], testonly = 0, cc_libs = [], -- GitLab From fe8905ed992e29467bcf69053dfce07b77f906d3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 14:42:46 -0700 Subject: [PATCH 173/294] Fix issue where some pooling op gradients on the CPU would fail when strides > ksize. RELNOTES: Enable support for strides > ksize for pooling operations. PiperOrigin-RevId: 167327007 --- tensorflow/core/kernels/ops_util.cc | 5 -- tensorflow/core/kernels/ops_util_test.cc | 38 ++++++++++-- .../kernel_tests/pooling_ops_3d_test.py | 36 ++++++++++++ .../python/kernel_tests/pooling_ops_test.py | 58 +++++++++++++++++++ 4 files changed, 126 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/kernels/ops_util.cc b/tensorflow/core/kernels/ops_util.cc index 130939263b..efacd05dd3 100644 --- a/tensorflow/core/kernels/ops_util.cc +++ b/tensorflow/core/kernels/ops_util.cc @@ -37,11 +37,6 @@ Eigen::PaddingType BrainPadding2EigenPadding(Padding padding) { Status GetBroadcastSize(const int index, const int in_size, const int ksize, const int stride, const int pad_size, int* bindex, int* bsize) { - // Cannot have strides larger than the patch size. - if (stride > ksize) { - return errors::InvalidArgument( - "stride must be less than or equal to kernel size"); - } // Cannot have index beyond the input size. if (index * stride > in_size) { return errors::InvalidArgument( diff --git a/tensorflow/core/kernels/ops_util_test.cc b/tensorflow/core/kernels/ops_util_test.cc index 42ffef6735..9d53882dee 100644 --- a/tensorflow/core/kernels/ops_util_test.cc +++ b/tensorflow/core/kernels/ops_util_test.cc @@ -173,12 +173,6 @@ TEST_F(OpsUtilTest, Get2dOutputSizeVerbose) { VerifyGet2dOutputVerboseSizeValues(pad_struct2, error::OK); } -// Test stride > ksize fails with INVALID_ARGUMENT. -TEST_F(OpsUtilTest, GetBroadcastTest3_1_2_0) { - bcast_struct bcast = {{0, 3, 1, 2, 0}, {0, 3}}; - VerifyBoundaries(bcast, error::INVALID_ARGUMENT); -} - // Test index * stride > in_size fails with INVALID_ARGUMENT. TEST_F(OpsUtilTest, GetBroadcastTestBadIndex) { bcast_struct bcast = {{2, 3, 1, 2, 0}, {0, 3}}; @@ -281,6 +275,38 @@ TEST_F(OpsUtilTest, GetBroadcastTest3_3_3_2) { } } +// in_size = 3, ksize = 1, stride = 2, pad_size = 0 +TEST_F(OpsUtilTest, GetBroadcastTest3_1_2_0) { + bcast_struct bcast[] = { + {{0, 3, 1, 2, 0}, {0, 1}}, + {{1, 3, 1, 2, 0}, {2, 1}}, + }; + for (size_t i = 0; i < sizeof(bcast) / sizeof(bcast[0]); ++i) { + VerifyBcastValues(bcast[i]); + } +} + +// in_size = 3, ksize = 2, stride = 3, pad_size = 0 +TEST_F(OpsUtilTest, GetBroadcastTest3_2_3_0) { + bcast_struct bcast[] = { + {{0, 3, 2, 3, 0}, {0, 2}}, + }; + for (size_t i = 0; i < sizeof(bcast) / sizeof(bcast[0]); ++i) { + VerifyBcastValues(bcast[i]); + } +} + +// in_size = 3, ksize = 2, stride = 3, pad_size = 1 +TEST_F(OpsUtilTest, GetBroadcastTest3_2_3_1) { + bcast_struct bcast[] = { + {{0, 3, 2, 3, 1}, {0, 1}}, + {{1, 3, 2, 3, 1}, {2, 1}}, + }; + for (size_t i = 0; i < sizeof(bcast) / sizeof(bcast[0]); ++i) { + VerifyBcastValues(bcast[i]); + } +} + TEST_F(OpsUtilTest, SanitizeThreadSuffix) { EXPECT_EQ("_aBc123_-___", SanitizeThreadSuffix("/aBc123_- /")); } diff --git a/tensorflow/python/kernel_tests/pooling_ops_3d_test.py b/tensorflow/python/kernel_tests/pooling_ops_3d_test.py index fa1553a3f6..b01fc12953 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_3d_test.py @@ -321,6 +321,15 @@ class PoolingTest(test.TestCase): strides=(1, 1, 1), padding="VALID") + def testMaxPoolGradValidPadding1_2_3d(self): + self._ConstructAndTestGradient( + nn_ops.max_pool3d, + input_sizes=[1, 3, 3, 3, 1], + output_sizes=[1, 2, 2, 2, 1], + window=(1, 1, 1), + strides=(2, 2, 2), + padding="VALID") + def testMaxPoolGradValidPadding2_2_3d(self): self._ConstructAndTestGradient( nn_ops.max_pool3d, @@ -339,6 +348,15 @@ class PoolingTest(test.TestCase): strides=(1, 1, 1), padding="SAME") + def testMaxPoolGradSamePadding1_2_3d(self): + self._ConstructAndTestGradient( + nn_ops.max_pool3d, + input_sizes=[1, 3, 2, 4, 1], + output_sizes=[1, 2, 1, 2, 1], + window=(1, 1, 1), + strides=(2, 2, 2), + padding="SAME") + def testMaxPoolGradSamePadding2_1_3d(self): self._ConstructAndTestGradient( nn_ops.max_pool3d, @@ -375,6 +393,15 @@ class PoolingTest(test.TestCase): strides=(1, 1, 1), padding="VALID") + def testAvgPoolGradValidPadding1_2_3d(self): + self._ConstructAndTestGradient( + nn_ops.avg_pool3d, + input_sizes=[1, 3, 3, 3, 1], + output_sizes=[1, 2, 2, 2, 1], + window=(1, 1, 1), + strides=(2, 2, 2), + padding="VALID") + def testAvgPoolGradValidPadding2_1_3d(self): self._ConstructAndTestGradient( nn_ops.avg_pool3d, @@ -402,6 +429,15 @@ class PoolingTest(test.TestCase): strides=(1, 1, 1), padding="SAME") + def testAvgPoolGradSamePadding1_2_3d(self): + self._ConstructAndTestGradient( + nn_ops.avg_pool3d, + input_sizes=[1, 3, 2, 4, 2], + output_sizes=[1, 2, 1, 2, 2], + window=(1, 1, 1), + strides=(2, 2, 2), + padding="SAME") + def testAvgPoolGradSamePadding2_1_3d(self): self._ConstructAndTestGradient( nn_ops.avg_pool3d, diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index da14871c87..9eb1fea803 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -998,6 +998,20 @@ class PoolingTest(test.TestCase): data_format=data_format, use_gpu=use_gpu) + def _testMaxPoolGradValidPadding1_2(self, data_format, use_gpu): + for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + self._ConstructAndTestGradient( + pool_func, + input_sizes=[1, 3, 3, 1], + output_sizes=[1, 2, 2, 1], + window_rows=1, + window_cols=1, + row_stride=2, + col_stride=2, + padding="VALID", + data_format=data_format, + use_gpu=use_gpu) + def _testMaxPoolGradValidPadding2_2(self, data_format, use_gpu): for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( @@ -1026,6 +1040,20 @@ class PoolingTest(test.TestCase): data_format=data_format, use_gpu=use_gpu) + def _testMaxPoolGradSamePadding1_2(self, data_format, use_gpu): + for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + self._ConstructAndTestGradient( + pool_func, + input_sizes=[2, 2, 4, 3], + output_sizes=[2, 1, 2, 3], + window_rows=1, + window_cols=1, + row_stride=2, + col_stride=2, + padding="SAME", + data_format=data_format, + use_gpu=use_gpu) + def _testMaxPoolGradSamePadding2_1(self, data_format, use_gpu): for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( @@ -1071,10 +1099,12 @@ class PoolingTest(test.TestCase): def testMaxPoolGrad(self): for (data_format, use_gpu) in GetTestConfigs(): self._testMaxPoolGradValidPadding1_1(data_format, use_gpu) + self._testMaxPoolGradValidPadding1_2(data_format, use_gpu) self._testMaxPoolGradValidPadding2_1_6(data_format, use_gpu) self._testMaxPoolGradValidPadding2_1_7(data_format, use_gpu) self._testMaxPoolGradValidPadding2_2(data_format, use_gpu) self._testMaxPoolGradSamePadding1_1(data_format, use_gpu) + self._testMaxPoolGradSamePadding1_2(data_format, use_gpu) self._testMaxPoolGradSamePadding2_1(data_format, use_gpu) self._testMaxPoolGradSamePadding2_2(data_format, use_gpu) self._testMaxPoolGradSamePadding3_1(data_format, use_gpu) @@ -1497,9 +1527,11 @@ class PoolingTest(test.TestCase): def testAvgPoolGrad(self): for (data_format, use_gpu) in GetTestConfigs(): self._testAvgPoolGradValidPadding1_1(data_format, use_gpu) + self._testAvgPoolGradValidPadding1_2(data_format, use_gpu) self._testAvgPoolGradValidPadding2_1(data_format, use_gpu) self._testAvgPoolGradValidPadding2_2(data_format, use_gpu) self._testAvgPoolGradSamePadding1_1(data_format, use_gpu) + self._testAvgPoolGradSamePadding1_2(data_format, use_gpu) self._testAvgPoolGradSamePadding2_1(data_format, use_gpu) self._testAvgPoolGradSamePadding2_2(data_format, use_gpu) self._testAvgPoolGradSamePadding3_1(data_format, use_gpu) @@ -1517,6 +1549,19 @@ class PoolingTest(test.TestCase): data_format=data_format, use_gpu=use_gpu) + def _testAvgPoolGradValidPadding1_2(self, data_format, use_gpu): + self._ConstructAndTestGradient( + nn_ops.avg_pool, + input_sizes=[2, 3, 3, 3], + output_sizes=[2, 2, 2, 3], + window_rows=1, + window_cols=1, + row_stride=2, + col_stride=2, + padding="VALID", + data_format=data_format, + use_gpu=use_gpu) + def _testAvgPoolGradValidPadding2_1(self, data_format, use_gpu): self._ConstructAndTestGradient( nn_ops.avg_pool, @@ -1556,6 +1601,19 @@ class PoolingTest(test.TestCase): data_format=data_format, use_gpu=use_gpu) + def _testAvgPoolGradSamePadding1_2(self, data_format, use_gpu): + self._ConstructAndTestGradient( + nn_ops.avg_pool, + input_sizes=[2, 2, 4, 3], + output_sizes=[2, 1, 2, 3], + window_rows=1, + window_cols=1, + row_stride=2, + col_stride=2, + padding="SAME", + data_format=data_format, + use_gpu=use_gpu) + def _testAvgPoolGradSamePadding2_1(self, data_format, use_gpu): self._ConstructAndTestGradient( nn_ops.avg_pool, -- GitLab From c75f0ffb80702c4937636ebb8df769d6f0897797 Mon Sep 17 00:00:00 2001 From: Mingxing Tan Date: Fri, 1 Sep 2017 14:59:36 -0700 Subject: [PATCH 174/294] Enable partial JPEG decompression. PiperOrigin-RevId: 167329034 --- tensorflow/core/lib/jpeg/jpeg_mem.cc | 229 +++++++++++++++--- tensorflow/core/lib/jpeg/jpeg_mem.h | 11 + tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc | 190 ++++++++++++++- 3 files changed, 390 insertions(+), 40 deletions(-) diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.cc b/tensorflow/core/lib/jpeg/jpeg_mem.cc index 258793aa1e..3c7e5ca696 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem.cc +++ b/tensorflow/core/lib/jpeg/jpeg_mem.cc @@ -70,13 +70,24 @@ class FewerArgsForCompiler { int stride_; }; +// Check whether the crop window is valid, assuming crop is true. +bool IsCropWindowValid(const UncompressFlags& flags, int input_image_width, + int input_image_height) { + // Crop window is valid only if it is non zero and all the window region is + // within the original image. + return flags.crop_width > 0 && flags.crop_height > 0 && flags.crop_x >= 0 && + flags.crop_y >= 0 && + flags.crop_y + flags.crop_height <= input_image_height && + flags.crop_x + flags.crop_width <= input_image_width; +} + uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { // unpack the argball const int datasize = argball->datasize_; const auto& flags = argball->flags_; const int ratio = flags.ratio; int components = flags.components; - int stride = flags.stride; // may be 0 + int stride = flags.stride; // may be 0 int64* const nwarn = argball->pnwarn_; // may be NULL // Can't decode if the ratio is not recognized by libjpeg @@ -159,8 +170,43 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { return nullptr; } + JDIMENSION target_output_width = cinfo.output_width; + JDIMENSION target_output_height = cinfo.output_height; + JDIMENSION skipped_scanlines = 0; +#if !defined(WIN32) + if (flags.crop) { + // Update target output height and width based on crop window. + target_output_height = flags.crop_height; + target_output_width = flags.crop_width; + + // So far, cinfo holds the original input image information. + if (!IsCropWindowValid(flags, cinfo.output_width, cinfo.output_height)) { + LOG(ERROR) << "Invalid crop window: x=" << flags.crop_x + << ", y=" << flags.crop_y << ", w=" << target_output_width + << ", h=" << target_output_height + << " for image_width: " << cinfo.output_width + << " and image_height: " << cinfo.output_height; + jpeg_destroy_decompress(&cinfo); + return nullptr; + } + + // Update cinfo.output_width. It is tricky that cinfo.output_width must + // fall on an Minimum Coded Unit (MCU) boundary; if it doesn't, then it will + // be moved left to the nearest MCU boundary, and width will be increased + // accordingly. Therefore, the final cinfo.crop_width might differ from the + // given flags.crop_width. Please see libjpeg library for details. + JDIMENSION crop_width = flags.crop_width; + JDIMENSION crop_x = flags.crop_x; + jpeg_crop_scanline(&cinfo, &crop_x, &crop_width); + + // Update cinfo.output_scanline. + skipped_scanlines = jpeg_skip_scanlines(&cinfo, flags.crop_y); + CHECK_EQ(skipped_scanlines, flags.crop_y); + } +#endif + // check for compatible stride - const int min_stride = cinfo.output_width * components * sizeof(JSAMPLE); + const int min_stride = target_output_width * components * sizeof(JSAMPLE); if (stride == 0) { stride = min_stride; } else if (stride < min_stride) { @@ -170,47 +216,88 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { } // Remember stride and height for use in Uncompress - argball->height_ = cinfo.output_height; + argball->height_ = target_output_height; argball->stride_ = stride; - uint8* const dstdata = argball->allocate_output_( - cinfo.output_width, cinfo.output_height, components); +#if defined(WIN32) + uint8* dstdata = nullptr; + if (flags.crop) { + dstdata = new JSAMPLE[stride * target_output_height]; + } else { + dstdata = argball->allocate_output_(target_output_width, + target_output_height, components); + } +#else + uint8* dstdata = argball->allocate_output_(target_output_width, + target_output_height, components); +#endif if (dstdata == nullptr) { jpeg_destroy_decompress(&cinfo); return nullptr; } JSAMPLE* output_line = static_cast(dstdata); - // Temporary buffer used for CMYK -> RGB conversion. + // jpeg_read_scanlines requires the buffers to be allocated based on + // cinfo.output_width, but the target image width might be different if crop + // is enabled and crop_width is not MCU aligned. In this case, we need to + // realign the scanline output to achieve the exact cropping. Notably, only + // cinfo.output_width needs to fall on MCU boundary, while cinfo.output_height + // has no such constraint. + const bool need_realign_cropped_scanline = + (target_output_width != cinfo.output_width); const bool use_cmyk = (cinfo.out_color_space == JCS_CMYK); - tempdata = use_cmyk ? new JSAMPLE[cinfo.output_width * 4] : nullptr; + + if (use_cmyk) { + // Temporary buffer used for CMYK -> RGB conversion. + tempdata = new JSAMPLE[cinfo.output_width * 4]; + } else if (need_realign_cropped_scanline) { + // Temporary buffer used for MCU-aligned scanline data. + tempdata = new JSAMPLE[cinfo.output_width * components]; + } // If there is an error reading a line, this aborts the reading. // Save the fraction of the image that has been read. - argball->height_read_ = cinfo.output_height; - while (cinfo.output_scanline < cinfo.output_height) { + argball->height_read_ = target_output_height; + + // These variables are just to avoid repeated computation in the loop. + const int max_scanlines_to_read = skipped_scanlines + target_output_height; + const int mcu_align_offset = + (cinfo.output_width - target_output_width) * (use_cmyk ? 4 : components); + while (cinfo.output_scanline < max_scanlines_to_read) { int num_lines_read = 0; - if (cinfo.out_color_space == JCS_CMYK) { + if (use_cmyk) { num_lines_read = jpeg_read_scanlines(&cinfo, &tempdata, 1); - // Convert CMYK to RGB - for (size_t i = 0; i < cinfo.output_width; ++i) { - int c = tempdata[4 * i + 0]; - int m = tempdata[4 * i + 1]; - int y = tempdata[4 * i + 2]; - int k = tempdata[4 * i + 3]; - int r, g, b; - if (cinfo.saw_Adobe_marker) { - r = (k * c) / 255; - g = (k * m) / 255; - b = (k * y) / 255; - } else { - r = (255 - k) * (255 - c) / 255; - g = (255 - k) * (255 - m) / 255; - b = (255 - k) * (255 - y) / 255; + if (num_lines_read > 0) { + // Convert CMYK to RGB if scanline read succeeded. + for (size_t i = 0; i < target_output_width; ++i) { + int offset = 4 * i; + if (need_realign_cropped_scanline) { + // Align the offset for MCU boundary. + offset += mcu_align_offset; + } + const int c = tempdata[offset + 0]; + const int m = tempdata[offset + 1]; + const int y = tempdata[offset + 2]; + const int k = tempdata[offset + 3]; + int r, g, b; + if (cinfo.saw_Adobe_marker) { + r = (k * c) / 255; + g = (k * m) / 255; + b = (k * y) / 255; + } else { + r = (255 - k) * (255 - c) / 255; + g = (255 - k) * (255 - m) / 255; + b = (255 - k) * (255 - y) / 255; + } + output_line[3 * i + 0] = r; + output_line[3 * i + 1] = g; + output_line[3 * i + 2] = b; } - output_line[3 * i + 0] = r; - output_line[3 * i + 1] = g; - output_line[3 * i + 2] = b; + } + } else if (need_realign_cropped_scanline) { + num_lines_read = jpeg_read_scanlines(&cinfo, &tempdata, 1); + if (num_lines_read > 0) { + memcpy(output_line, tempdata + mcu_align_offset, min_stride); } } else { num_lines_read = jpeg_read_scanlines(&cinfo, &output_line, 1); @@ -218,12 +305,13 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { // Handle error cases if (num_lines_read == 0) { LOG(ERROR) << "Premature end of JPEG data. Stopped at line " - << cinfo.output_scanline << "/" << cinfo.output_height; + << cinfo.output_scanline - skipped_scanlines << "/" + << target_output_height; if (!flags.try_recover_truncated_jpeg) { - argball->height_read_ = cinfo.output_scanline; + argball->height_read_ = cinfo.output_scanline - skipped_scanlines; error = JPEGERRORS_UNEXPECTED_END_OF_DATA; } else { - for (size_t line = cinfo.output_scanline; line < cinfo.output_height; + for (size_t line = cinfo.output_scanline; line < max_scanlines_to_read; ++line) { if (line == 0) { // If even the first line is missing, fill with black color @@ -235,9 +323,9 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { output_line += stride; } argball->height_read_ = - cinfo.output_height; // consider all lines as read + target_output_height; // consider all lines as read // prevent error-on-exit in libjpeg: - cinfo.output_scanline = cinfo.output_height; + cinfo.output_scanline = max_scanlines_to_read; } break; } @@ -248,23 +336,33 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { delete[] tempdata; tempdata = nullptr; +#if !defined(WIN32) + if (flags.crop && cinfo.output_scanline < cinfo.output_height) { + // Skip the rest of scanlines, required by jpeg_destroy_decompress. + jpeg_skip_scanlines(&cinfo, + cinfo.output_height - flags.crop_y - flags.crop_height); + // After this, cinfo.output_height must be equal to cinfo.output_height; + // otherwise, jpeg_destroy_decompress would fail. + } +#endif + // Convert the RGB data to RGBA, with alpha set to 0xFF to indicate // opacity. // RGBRGBRGB... --> RGBARGBARGBA... if (components == 4) { // Start on the last line. JSAMPLE* scanlineptr = static_cast( - dstdata + static_cast(cinfo.output_height - 1) * stride); + dstdata + static_cast(target_output_height - 1) * stride); const JSAMPLE kOpaque = -1; // All ones appropriate for JSAMPLE. - const int right_rgb = (cinfo.output_width - 1) * 3; - const int right_rgba = (cinfo.output_width - 1) * 4; + const int right_rgb = (target_output_width - 1) * 3; + const int right_rgba = (target_output_width - 1) * 4; - for (int y = cinfo.output_height; y-- > 0;) { + for (int y = target_output_height; y-- > 0;) { // We do all the transformations in place, going backwards for each row. const JSAMPLE* rgb_pixel = scanlineptr + right_rgb; JSAMPLE* rgba_pixel = scanlineptr + right_rgba; scanlineptr -= stride; - for (int x = cinfo.output_width; x-- > 0; + for (int x = target_output_width; x-- > 0; rgba_pixel -= 4, rgb_pixel -= 3) { // We copy the 3 bytes at rgb_pixel into the 4 bytes at rgba_pixel // The "a" channel is set to be opaque. @@ -319,8 +417,61 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { LOG(ERROR) << "Unhandled case " << error; break; } - jpeg_destroy_decompress(&cinfo); +#if defined(WIN32) + // TODO(tanmingxing): delete all these code after migrating to libjpeg_turbo + // for Windows. + if (flags.crop) { + // Update target output height and width based on crop window. + target_output_height = flags.crop_height; + target_output_width = flags.crop_width; + + // cinfo holds the original input image information. + if (!IsCropWindowValid(flags, cinfo.output_width, cinfo.output_height)) { + LOG(ERROR) << "Invalid crop window: x=" << flags.crop_x + << ", y=" << flags.crop_y << ", w=" << target_output_width + << ", h=" << target_output_height + << " for image_width: " << cinfo.output_width + << " and image_height: " << cinfo.output_height; + delete[] dstdata; + jpeg_destroy_decompress(&cinfo); + return nullptr; + } + + const uint8* full_image = dstdata; + dstdata = argball->allocate_output_(target_output_width, + target_output_height, components); + if (dstdata == nullptr) { + delete[] full_image; + jpeg_destroy_decompress(&cinfo); + return nullptr; + } + + const int full_image_stride = stride; + // Update stride and hight for crop window. + const int min_stride = target_output_width * components * sizeof(JSAMPLE); + if (flags.stride == 0) { + stride = min_stride; + } + argball->height_ = target_output_height; + argball->stride_ = stride; + + if (argball->height_read_ > target_output_height) { + argball->height_read_ = target_output_height; + } + const int crop_offset = flags.crop_x * components * sizeof(JSAMPLE); + const uint8* full_image_ptr = full_image + flags.crop_y * full_image_stride; + uint8* crop_image_ptr = dstdata; + for (int i = 0; i < argball->height_read_; i++) { + memcpy(crop_image_ptr, full_image_ptr + crop_offset, min_stride); + crop_image_ptr += stride; + full_image_ptr += full_image_stride; + } + delete[] full_image; + } +#endif + + jpeg_destroy_decompress(&cinfo); return dstdata; } diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.h b/tensorflow/core/lib/jpeg/jpeg_mem.h index ac34f29f22..59342d28c0 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem.h +++ b/tensorflow/core/lib/jpeg/jpeg_mem.h @@ -61,6 +61,17 @@ struct UncompressFlags { // // Setting this has a quality/speed trade-off implication. J_DCT_METHOD dct_method = JDCT_DEFAULT; + + // Settings of crop window before decompression. + bool crop = false; + // Vertical coordinate of the top-left corner of the result in the input. + int crop_x = 0; + // Horizontal coordinate of the top-left corner of the result in the input. + int crop_y = 0; + // Width of the output image. + int crop_width = 0; + // Height of the output image. + int crop_height = 0; }; // Uncompress some raw JPEG data given by the pointer srcdata and the length diff --git a/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc b/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc index cc8646750e..15266af1db 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc +++ b/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc @@ -57,7 +57,7 @@ void ReadFileToStringOrDie(Env* env, const string& filename, string* output) { void TestJPEG(Env* env, const string& jpegfile) { // Read the data from the jpeg file into memory string jpeg; - ReadFileToStringOrDie(Env::Default(), jpegfile, &jpeg); + ReadFileToStringOrDie(env, jpegfile, &jpeg); const int fsize = jpeg.size(); const uint8* const temp = bit_cast(jpeg.data()); @@ -95,6 +95,194 @@ TEST(JpegMemTest, Jpeg) { TestJPEG(env, data_path + "jpeg_merge_test1_cmyk.jpg"); } +void TestCropAndDecodeJpeg(Env* env, const string& jpegfile, + const UncompressFlags& default_flags) { + // Read the data from the jpeg file into memory + string jpeg; + ReadFileToStringOrDie(env, jpegfile, &jpeg); + const int fsize = jpeg.size(); + auto temp = bit_cast(jpeg.data()); + + // Decode the whole image. + std::unique_ptr imgdata1; + int w1, h1, c1; + { + UncompressFlags flags = default_flags; + if (flags.stride == 0) { + imgdata1.reset(Uncompress(temp, fsize, flags, &w1, &h1, &c1, nullptr)); + } else { + // If stride is not zero, the default allocator would fail because it + // allocate w*h*c bytes, but the actual required bytes should be stride*h. + // Therefore, we provide a specialized allocator here. + uint8* buffer = nullptr; + imgdata1.reset(Uncompress(temp, fsize, flags, nullptr, + [&](int width, int height, int components) { + w1 = width; + h1 = height; + c1 = components; + buffer = new uint8[flags.stride * height]; + return buffer; + })); + } + ASSERT_NE(imgdata1, nullptr); + } + + auto check_crop_and_decode_func = [&](int crop_x, int crop_y, int crop_width, + int crop_height) { + std::unique_ptr imgdata2; + int w, h, c; + UncompressFlags flags = default_flags; + flags.crop = true; + flags.crop_x = crop_x; + flags.crop_y = crop_y; + flags.crop_width = crop_width; + flags.crop_height = crop_height; + if (flags.stride == 0) { + imgdata2.reset(Uncompress(temp, fsize, flags, &w, &h, &c, nullptr)); + } else { + uint8* buffer = nullptr; + imgdata2.reset(Uncompress(temp, fsize, flags, nullptr, + [&](int width, int height, int components) { + w = width; + h = height; + c = components; + buffer = new uint8[flags.stride * height]; + return buffer; + })); + } + ASSERT_NE(imgdata2, nullptr); + + ASSERT_EQ(w, crop_width); + ASSERT_EQ(h, crop_height); + ASSERT_EQ(c, c1); + + const int stride1 = (flags.stride != 0) ? flags.stride : w1 * c; + const int stride2 = (flags.stride != 0) ? flags.stride : w * c; + for (int i = 0; i < crop_height; i++) { + const uint8* p1 = &imgdata1[(i + crop_y) * stride1 + crop_x * c]; + const uint8* p2 = &imgdata2[i * stride2]; + + for (int j = 0; j < c * w; j++) { + ASSERT_EQ(p1[j], p2[j]) + << "p1 != p2 in [" << i << "][" << j / 3 << "][" << j % 3 << "]"; + } + } + }; + + // Check different crop windows. + check_crop_and_decode_func(0, 0, 5, 5); + check_crop_and_decode_func(0, 0, w1, 5); + check_crop_and_decode_func(0, 0, 5, h1); + check_crop_and_decode_func(0, 0, w1, h1); + check_crop_and_decode_func(w1 - 5, h1 - 6, 5, 6); + check_crop_and_decode_func(5, 6, 10, 15); +} + +TEST(JpegMemTest, CropAndDecodeJpeg) { + Env* env = Env::Default(); + const string data_path = kTestData; + UncompressFlags flags; + + // Test basic flags for jpeg and cmyk jpeg. + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1_cmyk.jpg", flags); +} + +TEST(JpegMemTest, CropAndDecodeJpegWithRatio) { + Env* env = Env::Default(); + const string data_path = kTestData; + UncompressFlags flags; + for (int ratio : {1, 2, 4, 8}) { + flags.ratio = ratio; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); + } +} + +TEST(JpegMemTest, CropAndDecodeJpegWithComponents) { + Env* env = Env::Default(); + const string data_path = kTestData; + UncompressFlags flags; + for (const int components : {0, 1, 3}) { + flags.components = components; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); + } +} + +TEST(JpegMemTest, CropAndDecodeJpegWithUpScaling) { + Env* env = Env::Default(); + const string data_path = kTestData; + UncompressFlags flags; + flags.fancy_upscaling = true; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); +} + +TEST(JpegMemTest, CropAndDecodeJpegWithStride) { + Env* env = Env::Default(); + const string data_path = kTestData; + + // Read the data from the jpeg file into memory + string jpeg; + ReadFileToStringOrDie(env, data_path + "jpeg_merge_test1.jpg", &jpeg); + const int fsize = jpeg.size(); + auto temp = bit_cast(jpeg.data()); + + int w, h, c; + ASSERT_TRUE(GetImageInfo(temp, fsize, &w, &h, &c)); + + // stride must be either 0 or > w*c; otherwise, uncompress would fail. + UncompressFlags flags; + flags.stride = w * c; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); + flags.stride = w * c * 3; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); + flags.stride = w * c + 100; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); +} + +void CheckInvalidCropWindowFailed(const uint8* const temp, int fsize, int x, + int y, int w, int h) { + std::unique_ptr imgdata; + int ww, hh, cc; + UncompressFlags flags; + flags.components = 3; + flags.crop = true; + flags.crop_x = x; + flags.crop_y = y; + flags.crop_width = w; + flags.crop_height = h; + imgdata.reset(Uncompress(temp, fsize, flags, &ww, &hh, &cc, nullptr)); + CHECK(imgdata == nullptr); +} + +TEST(JpegMemTest, CropAndDecodeJpegWithInvalidCropWindow) { + Env* env = Env::Default(); + const string data_path = kTestData; + + // Read the data from the jpeg file into memory + string jpeg; + ReadFileToStringOrDie(env, data_path + "jpeg_merge_test1.jpg", &jpeg); + const int fsize = jpeg.size(); + auto temp = bit_cast(jpeg.data()); + + int w, h, c; + ASSERT_TRUE(GetImageInfo(temp, fsize, &w, &h, &c)); + + // Width and height for the crop window must be non zero. + CheckInvalidCropWindowFailed(temp, fsize, 11, 11, /*w=*/0, 11); + CheckInvalidCropWindowFailed(temp, fsize, 11, 11, 11, /*h=*/0); + + // Crop window must be non negative. + CheckInvalidCropWindowFailed(temp, fsize, /*x=*/-1, 11, 11, 11); + CheckInvalidCropWindowFailed(temp, fsize, 11, /*y=*/-1, 11, 11); + CheckInvalidCropWindowFailed(temp, fsize, 11, 11, /*w=*/-1, 11); + CheckInvalidCropWindowFailed(temp, fsize, 11, 11, 11, /*h=*/-1); + + // Invalid crop window width: x + crop_width = w + 1 > w + CheckInvalidCropWindowFailed(temp, fsize, /*x=*/w - 10, 11, 11, 11); + // Invalid crop window height: y + crop_height= h + 1 > h + CheckInvalidCropWindowFailed(temp, fsize, 11, /*y=*/h - 10, 11, 11); +} + TEST(JpegMemTest, Jpeg2) { // create known data, for size in_w x in_h const int in_w = 256; -- GitLab From 6af27b1e4906c60008c2202d2afb7f1ac209ae30 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Fri, 1 Sep 2017 15:29:54 -0700 Subject: [PATCH 175/294] Revert breaking change PiperOrigin-RevId: 167332804 --- tensorflow/core/kernels/variable_ops.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc index b14e555103..36b8ff09d7 100644 --- a/tensorflow/core/kernels/variable_ops.cc +++ b/tensorflow/core/kernels/variable_ops.cc @@ -83,7 +83,6 @@ TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL); IsVariableInitializedOp); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); -TF_CALL_bool(REGISTER_GPU_KERNELS) #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA -- GitLab From 42fcbb196052823c4393a4d5d8682ca425253f6a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 15:39:24 -0700 Subject: [PATCH 176/294] Automated g4 rollback of changelist 166396680 PiperOrigin-RevId: 167333886 --- tensorflow/contrib/cmake/tf_tests.cmake | 2 + tensorflow/core/kernels/BUILD | 9 +- tensorflow/core/kernels/l2loss_op.cc | 39 +- tensorflow/core/kernels/l2loss_op.h | 16 +- tensorflow/core/kernels/l2loss_op_gpu.cu.cc | 49 +- tensorflow/core/kernels/reduction_ops.h | 3 +- .../core/kernels/reduction_ops_common.h | 15 +- .../core/kernels/reduction_ops_gpu.cu.cc | 210 +++++- .../core/kernels/reduction_ops_gpu_kernels.h | 713 ++++++++++++++++++ tensorflow/core/kernels/reduction_ops_test.cc | 163 +++- .../core/util/permutation_input_iterator.h | 134 ++++ .../core/util/transform_output_iterator.h | 149 ++++ tensorflow/python/kernel_tests/BUILD | 20 + .../python/kernel_tests/cholesky_op_test.py | 13 +- .../python/kernel_tests/reduction_ops_test.py | 34 +- .../kernel_tests/reduction_ops_test_big.py | 179 +++++ 16 files changed, 1617 insertions(+), 131 deletions(-) create mode 100644 tensorflow/core/kernels/reduction_ops_gpu_kernels.h create mode 100644 tensorflow/core/util/permutation_input_iterator.h create mode 100644 tensorflow/core/util/transform_output_iterator.h create mode 100644 tensorflow/python/kernel_tests/reduction_ops_test_big.py diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 25f00de81d..6507a9a5e0 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -289,6 +289,8 @@ if (tensorflow_BUILD_PYTHON_TESTS) # Failing with TF 1.3 (TODO) "${tensorflow_source_dir}/tensorflow/contrib/distributions/python/kernel_tests/estimator_test.py" "${tensorflow_source_dir}/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_test.py" + # Test should only be run manually + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/reduction_ops_test_big.py" ) endif() list(REMOVE_ITEM tf_test_src_py ${tf_test_src_py_exclude}) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 6530ecf13f..cb7e3b3316 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2581,8 +2581,9 @@ tf_kernel_library( tf_kernel_library( name = "reduction_ops", + srcs = ["reduction_ops_gpu_kernels.h"], prefix = "reduction_ops", - deps = MATH_DEPS, + deps = MATH_DEPS + if_cuda(["@cub_archive//:cub"]), ) tf_kernel_library( @@ -3064,14 +3065,16 @@ tf_kernel_library( tf_kernel_library( name = "l2loss_op", prefix = "l2loss_op", + #srcs = ["reduction_ops_gpu_kernels.h"], deps = [ + ":reduction_ops", + "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_grad", "//tensorflow/core:nn_ops_op_lib", - "//third_party/eigen3", - ], + ] + if_cuda(["@cub_archive//:cub"]), ) tf_cuda_cc_test( diff --git a/tensorflow/core/kernels/l2loss_op.cc b/tensorflow/core/kernels/l2loss_op.cc index 9875cd027d..f8ed935157 100644 --- a/tensorflow/core/kernels/l2loss_op.cc +++ b/tensorflow/core/kernels/l2loss_op.cc @@ -27,10 +27,9 @@ limitations under the License. namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; -typedef Eigen::GpuDevice GPUDevice; -template -class L2LossOp : public OpKernel { +template +class L2LossOp : public OpKernel { public: explicit L2LossOp(OpKernelConstruction* context) : OpKernel(context) {} @@ -42,8 +41,9 @@ class L2LossOp : public OpKernel { Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape({}), &output)); - functor::L2Loss()(context->eigen_device(), - input.flat(), output->scalar()); + const CPUDevice& d = context->eigen_device(); + output->scalar().device(d) = + (input.flat().square() * static_cast(0.5)).sum(); } }; @@ -57,33 +57,4 @@ REGISTER_KERNEL(double); REGISTER_KERNEL(Eigen::half); #undef REGISTER_KERNEL -#if GOOGLE_CUDA -// Forward declarations of the functor specializations for GPU. -namespace functor { -#define DECLARE_GPU_SPEC(T) \ - template <> \ - void L2Loss::operator()(const GPUDevice& d, \ - typename TTypes::ConstTensor input, \ - typename TTypes::Scalar output); \ - extern template struct L2Loss; - -DECLARE_GPU_SPEC(float); -DECLARE_GPU_SPEC(double); -DECLARE_GPU_SPEC(Eigen::half); -#undef DECLARE_GPU_SPEC -} // namespace functor - -// Registration of the GPU implementations. -#define REGISTER_GPU_KERNEL(T) \ - REGISTER_KERNEL_BUILDER( \ - Name("L2Loss").Device(DEVICE_GPU).TypeConstraint("T"), \ - L2LossOp); - -REGISTER_GPU_KERNEL(float); -REGISTER_GPU_KERNEL(double); -REGISTER_GPU_KERNEL(Eigen::half); -#undef REGISTER_GPU_KERNEL - -#endif // GOOGLE_CUDA - } // namespace tensorflow diff --git a/tensorflow/core/kernels/l2loss_op.h b/tensorflow/core/kernels/l2loss_op.h index f7204cefdd..4953aa237c 100644 --- a/tensorflow/core/kernels/l2loss_op.h +++ b/tensorflow/core/kernels/l2loss_op.h @@ -15,25 +15,19 @@ limitations under the License. #ifndef TENSORFLOW_KERNELS_L2LOSS_OP_H_ #define TENSORFLOW_KERNELS_L2LOSS_OP_H_ -// Functor definition for L2LossOp, must be compilable by nvcc. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" namespace tensorflow { -namespace functor { -// Functor used by L2LossOp to do the computations. template -struct L2Loss { - void operator()(const Device& d, typename TTypes::ConstTensor input, - typename TTypes::Scalar output) { - // We flatten the input tensor and reduce on dimension 0, producing - // a single number which is Mul(Sum(x^2), 0.5). - output.device(d) = (input.square() * static_cast(0.5)).sum(); - } +struct L2LossOp : public OpKernel { + explicit L2LossOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) {} }; -} // namespace functor } // namespace tensorflow #endif // TENSORFLOW_KERNELS_L2LOSS_OP_H_ diff --git a/tensorflow/core/kernels/l2loss_op_gpu.cu.cc b/tensorflow/core/kernels/l2loss_op_gpu.cu.cc index 420df37086..73b6472254 100644 --- a/tensorflow/core/kernels/l2loss_op_gpu.cu.cc +++ b/tensorflow/core/kernels/l2loss_op_gpu.cu.cc @@ -21,12 +21,55 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/kernels/reduction_ops_common.h" +#include "tensorflow/core/kernels/reduction_ops_gpu_kernels.h" + namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; -template struct functor::L2Loss; -template struct functor::L2Loss; -template struct functor::L2Loss; + +// TODO(eriche): can add specialization for half2 +template +struct squareHalf { + __host__ __device__ T operator()(const T& x) const { + return static_cast(0.5) * x * x; + } +}; + +template +class L2LossOp : public OpKernel { + public: + explicit L2LossOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + // The input tensor can be of any number of dimensions, even though it's + // 2D in most typical applications. + const Tensor& input = context->input(0); + // The output is a single number. + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, TensorShape({}), &output)); + typedef cub::TransformInputIterator, T*> inputIterType; + inputIterType input_itr((T*)input.flat().data(), squareHalf()); + typedef const Eigen::array::Tensor::Index, 1>& ReductionAxes; + + Constants constants; + functor::ReduceImpl( + context, (T*)output->flat().data(), input_itr, 1, + input.flat().size(), 1, 1, 0, constants.kZero, cub::Sum(), T(0)); + } +}; + +// Registration of the GPU implementations. +#define REGISTER_GPU_KERNEL(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("L2Loss").Device(DEVICE_GPU).TypeConstraint("T"), \ + L2LossOp); + +REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(double); +REGISTER_GPU_KERNEL(Eigen::half); +#undef REGISTER_GPU_KERNEL } // namespace tensorflow diff --git a/tensorflow/core/kernels/reduction_ops.h b/tensorflow/core/kernels/reduction_ops.h index 5db9e6032e..e43d2828f3 100644 --- a/tensorflow/core/kernels/reduction_ops.h +++ b/tensorflow/core/kernels/reduction_ops.h @@ -20,6 +20,7 @@ limitations under the License. #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" namespace tensorflow { @@ -67,7 +68,7 @@ void FillIdentityEigenImpl(const Device& d, OUT_T out, const Reducer& reducer) { template struct ReduceFunctor { template - static void Reduce(const Device& d, OUT_T out, IN_T in, + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, const ReductionAxes& reduction_axes, const Reducer& reducer); diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h index 553f889523..71af9d88dc 100644 --- a/tensorflow/core/kernels/reduction_ops_common.h +++ b/tensorflow/core/kernels/reduction_ops_common.h @@ -190,24 +190,24 @@ class ReductionOp : public OpKernel { Functor::FillIdentity(d, tmp_out.flat(), reducer); } else if ((helper.ndims() == 1) && helper.reduce_first_axis()) { // Reduce to a scalar. - Functor::Reduce(d, helper.out(&tmp_out), helper.in(data), + Functor::Reduce(ctx, helper.out(&tmp_out), helper.in(data), constants.kZero, reducer); } else if ((helper.ndims() == 2) && helper.reduce_first_axis()) { // Can be viewed as a reduction of a matrix along 1st dimension. - Functor::Reduce(d, helper.out(&tmp_out), helper.in(data), + Functor::Reduce(ctx, helper.out(&tmp_out), helper.in(data), constants.kZero, reducer); } else if ((helper.ndims() == 2) && !helper.reduce_first_axis()) { // Can be viewed as a reduction of a matrix along 2nd dimension. - Functor::Reduce(d, helper.out(&tmp_out), helper.in(data), + Functor::Reduce(ctx, helper.out(&tmp_out), helper.in(data), constants.kOne, reducer); } else if ((helper.ndims() == 3) && helper.reduce_first_axis()) { // Can be viewed as a reduction of a 3D tensor along 1st and 3rd // dimensions. - Functor::Reduce(d, helper.out(&tmp_out), helper.in(data), + Functor::Reduce(ctx, helper.out(&tmp_out), helper.in(data), constants.kZeroTwo, reducer); } else if ((helper.ndims() == 3) && !helper.reduce_first_axis()) { // Can be viewed as a reduction of a 3D tensor along 2nd dimension. - Functor::Reduce(d, helper.out(&tmp_out), helper.in(data), + Functor::Reduce(ctx, helper.out(&tmp_out), helper.in(data), constants.kOne, reducer); } else { // If we don't hit one of the cases above, transpose the data so that @@ -223,7 +223,7 @@ class ReductionOp : public OpKernel { const int64 unreduced = tmp_out.NumElements(); const int64 reduced = shuffled.NumElements() / unreduced; const Tensor& const_shuffled = shuffled; - Functor::Reduce(d, tmp_out.flat(), + Functor::Reduce(ctx, tmp_out.flat(), const_shuffled.shaped({unreduced, reduced}), constants.kOne, reducer); } @@ -258,9 +258,10 @@ namespace functor { template struct ReduceFunctorBase { template - static void Reduce(const Device& d, OUT_T out, IN_T in, + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, const ReductionAxes& reduction_axes, const Reducer& reducer) { + const Device& d = ctx->eigen_device(); ReduceEigenImpl(d, out, in, reduction_axes, reducer); } diff --git a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc index ec4490db83..8fd9165eb9 100644 --- a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc @@ -17,8 +17,7 @@ limitations under the License. #define EIGEN_USE_GPU -#include "tensorflow/core/framework/numeric_types.h" -#include "tensorflow/core/kernels/reduction_ops.h" +#include "tensorflow/core/kernels/reduction_ops_gpu_kernels.h" namespace tensorflow { namespace functor { @@ -33,15 +32,27 @@ typedef TTypes::Tensor::Index Index; template struct ReduceFunctor { template - static void Reduce(const GPUDevice& d, OUT_T out, IN_T in, + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, const ReductionAxes& reduction_axes, - const Reducer& reducer) { - ReduceEigenImpl(d, To32Bit(out), To32Bit(in), reduction_axes, reducer); + const Reducer& reducer); +}; + +template +struct ReduceFunctor> { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::SumReducer& reducer) { + ReduceImpl( + ctx, (T*)out.data(), (T*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, + cub::Sum(), T(0)); } template static void FillIdentity(const GPUDevice& d, OUT_T out, - const Reducer& reducer) { + const Eigen::internal::SumReducer& reducer) { FillIdentityEigenImpl(d, To32Bit(out), reducer); } }; @@ -49,19 +60,30 @@ struct ReduceFunctor { template struct ReduceFunctor> { template - static void Reduce(const GPUDevice& d, OUT_T out, IN_T in, + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, const ReductionAxes& reduction_axes, const Eigen::internal::MeanReducer& reducer) { - typedef typename IN_T::Index Index; - // Eigen sum reductions are much faster on GPU than mean reductions: - // Simply trigger them by computing the sum of the weighted inputs. - Index num_coeffs_to_reduce = 1; - for (int i = 0; i < Eigen::internal::array_size::value; - ++i) { - num_coeffs_to_reduce *= in.dimension(reduction_axes[i]); - } - T scale = T(1.0 / num_coeffs_to_reduce); - out.device(d) = (in * scale).sum(reduction_axes); + int divisor = 1; + if (out.rank() == 0) + divisor = in.size(); + else if (out.rank() == 1 && in.rank() == 2 && reduction_axes[0] == 0) + divisor = in.dimension(0); + else if (out.rank() == 1 && in.rank() == 2 && reduction_axes[0] == 1) + divisor = in.dimension(1); + else if (out.rank() == 1 && in.rank() == 3 && reduction_axes[0] == 0 && + reduction_axes[1] == 2) + divisor = in.dimension(0) * in.dimension(2); + else if (out.rank() == 2 && in.rank() == 3 && reduction_axes[0] == 1) + divisor = in.dimension(1); + + DividesBy div_op(static_cast(divisor)); + TransformOutputIterator> itr((T*)out.data(), div_op); + ReduceImpl>, T*, + ReductionAxes>(ctx, itr, (T*)in.data(), in.rank(), + in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), + reduction_axes, cub::Sum(), T(0)); } template @@ -71,15 +93,159 @@ struct ReduceFunctor> { } }; +template <> +struct ReduceFunctor> { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::MeanReducer& reducer) { + float divisor = 1.f; + if (out.rank() == 0) + divisor = in.size(); + else if (out.rank() == 1 && in.rank() == 2 && reduction_axes[0] == 0) + divisor = in.dimension(0); + else if (out.rank() == 1 && in.rank() == 2 && reduction_axes[0] == 1) + divisor = in.dimension(1); + else if (out.rank() == 1 && in.rank() == 3 && reduction_axes[0] == 0 && + reduction_axes[1] == 2) + divisor = in.dimension(0) * in.dimension(2); + else if (out.rank() == 2 && in.rank() == 3 && reduction_axes[0] == 1) + divisor = in.dimension(1); + DividesBy div_op(divisor); + + typedef cub::TransformInputIterator + inputIterType; + inputIterType input_itr((Eigen::half*)in.data(), HalfToFloat()); + + typedef TransformOutputIterator> + outputIterType; + outputIterType itr((Eigen::half*)out.data(), div_op); + + ReduceImpl( + ctx, itr, input_itr, in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, + cub::Sum(), 0.f); + } + + template + static void FillIdentity( + const GPUDevice& d, OUT_T out, + const Eigen::internal::MeanReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + +template +struct ReduceFunctor> { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::MaxReducer& reducer) { + ReduceImpl( + ctx, (T*)out.data(), (T*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, + cub::Max(), std::numeric_limits::lowest()); + } + + template + static void FillIdentity(const GPUDevice& d, OUT_T out, + const Eigen::internal::MaxReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + +template +struct ReduceFunctor> { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::MinReducer& reducer) { + ReduceImpl( + ctx, (T*)out.data(), (T*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, + cub::Min(), std::numeric_limits::max()); + } + + template + static void FillIdentity(const GPUDevice& d, OUT_T out, + const Eigen::internal::MinReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + +template +struct ReduceFunctor> { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::ProdReducer& reducer) { + ReduceImpl, T*, T*, ReductionAxes>( + ctx, (T*)out.data(), (T*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, + Prod(), T(1)); + } + + template + static void FillIdentity(const GPUDevice& d, OUT_T out, + const Eigen::internal::ProdReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + +template <> +struct ReduceFunctor { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::AndReducer& reducer) { + ReduceImpl( + ctx, (bool*)out.data(), (bool*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, And(), + true); + } + + template + static void FillIdentity(const GPUDevice& d, OUT_T out, + const Eigen::internal::AndReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + +template <> +struct ReduceFunctor { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::OrReducer& reducer) { + ReduceImpl( + ctx, (bool*)out.data(), (bool*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, Or(), + false); + } + + template + static void FillIdentity(const GPUDevice& d, OUT_T out, + const Eigen::internal::OrReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + // T: the data type // REDUCER: the reducer functor // NUM_AXES: the number of axes to reduce // IN_DIMS: the number of dimensions of the input tensor -#define DEFINE(T, REDUCER, IN_DIMS, NUM_AXES) \ - template void ReduceFunctor::Reduce( \ - const GPUDevice& d, TTypes::Tensor out, \ - TTypes::ConstTensor in, \ - const Eigen::array& reduction_axes, \ +#define DEFINE(T, REDUCER, IN_DIMS, NUM_AXES) \ + template void ReduceFunctor::Reduce( \ + OpKernelContext* ctx, TTypes::Tensor out, \ + TTypes::ConstTensor in, \ + const Eigen::array& reduction_axes, \ const REDUCER& reducer); #define DEFINE_IDENTITY(T, REDUCER) \ diff --git a/tensorflow/core/kernels/reduction_ops_gpu_kernels.h b/tensorflow/core/kernels/reduction_ops_gpu_kernels.h new file mode 100644 index 0000000000..ce471c672c --- /dev/null +++ b/tensorflow/core/kernels/reduction_ops_gpu_kernels.h @@ -0,0 +1,713 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "external/cub_archive/cub/device/device_reduce.cuh" +#include "external/cub_archive/cub/device/device_segmented_reduce.cuh" +#include "external/cub_archive/cub/iterator/counting_input_iterator.cuh" +#include "external/cub_archive/cub/iterator/transform_input_iterator.cuh" +#include "external/cub_archive/cub/warp/warp_reduce.cuh" +#include "cuda/include/cuComplex.h" +#include "tensorflow/core/framework/numeric_types.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/reduction_ops.h" +#include "tensorflow/core/lib/core/bits.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" +#include "tensorflow/core/util/permutation_input_iterator.h" +#include "tensorflow/core/util/transform_output_iterator.h" + +#include + +namespace tensorflow { +namespace functor { + +typedef Eigen::GpuDevice GPUDevice; + +template +struct Prod { + __host__ __device__ T operator()(const T& a, const T& b) const { + return a * b; + } +}; + +// needed to work around a compiler bug in nvcc - it doesn't seem to like +// the overloaded multiply op for std::complex +template <> +struct Prod> { + __host__ __device__ std::complex operator()( + const std::complex& a, const std::complex& b) const { + auto result = cuCmulf(make_cuComplex(a.real(), a.imag()), + make_cuComplex(b.real(), b.imag())); + return std::complex(result.x, result.y); + } +}; + +template <> +struct Prod> { + __host__ __device__ std::complex operator()( + const std::complex& a, const std::complex& b) const { + auto result = cuCmul(make_cuDoubleComplex(a.real(), a.imag()), + make_cuDoubleComplex(b.real(), b.imag())); + return std::complex(result.x, result.y); + } +}; + +template +struct DividesBy { + T divisor; + + __host__ __device__ explicit DividesBy(T divisor) : divisor(divisor) {} + + __host__ __device__ outT operator()(const T& x) const { return x / divisor; } +}; + +// needed to work around a compiler bug in nvcc - it doesn't seem to like +// the overloaded ops for std::complex +template <> +struct DividesBy> { + cuFloatComplex divisor; + + __host__ __device__ explicit DividesBy(std::complex divisor) + : divisor(make_cuComplex(divisor.real(), divisor.imag())) {} + + // implements + __host__ __device__ std::complex operator()( + const std::complex& x) const { + auto result = cuCdivf(make_cuComplex(x.real(), x.imag()), divisor); + return std::complex(result.x, result.y); + } +}; + +template <> +struct DividesBy> { + cuDoubleComplex divisor; + + __host__ __device__ explicit DividesBy(std::complex divisor) + : divisor(make_cuDoubleComplex(divisor.real(), divisor.imag())) {} + + // implements + __host__ __device__ std::complex operator()( + const std::complex& x) const { + auto result = cuCdiv(make_cuDoubleComplex(x.real(), x.imag()), divisor); + return std::complex(result.x, result.y); + } +}; + +template <> +struct DividesBy { + float divisor; + + __host__ __device__ explicit DividesBy(float divisor) : divisor(divisor) {} + + __host__ __device__ Eigen::half operator()(const float& x) const { + return Eigen::half(x / divisor); + } +}; + +struct HalfToFloat { + __host__ __device__ float operator()(const Eigen::half& x) const { + return Eigen::half_impl::half_to_float(x); + } +}; + +struct FloatToHalf { + __host__ __device__ Eigen::half operator()(const float& x) const { + return Eigen::half_impl::float_to_half_rtne(x); + } +}; + +struct And { + __host__ __device__ bool operator()(const bool& a, const bool& b) const { + return a && b; + } +}; + +struct Or { + __host__ __device__ bool operator()(const bool& a, const bool& b) const { + return a || b; + } +}; + +// each block does a grid strided loop and reduces its values locally +// the case of one block is used for low latency small reductions to scalars +template +__global__ void BlockReduceKernel( + T in, outT out, int num_elems, Op op, + typename std::iterator_traits::value_type initVal) { + const int bid = blockIdx.x; + const int tid = threadIdx.x; + + const int gid = bid * blockDim.x + tid; + const int stride = blockDim.x * gridDim.x; + + typedef typename std::iterator_traits::value_type value_type; + + value_type sum = initVal; + if (gid < num_elems) { + sum = in[gid]; + for (int pos = gid + stride; pos < num_elems; pos += stride) { + sum = op(sum, in[pos]); + } + } + + typedef cub::BlockReduce BlockReduce; + + __shared__ typename BlockReduce::TempStorage temp_storage; + + // only include input values in the reduction + // + // elements: ----------------- + // grid: |====|====|====|====|====| + const int num_elements_to_reduce = + max(min(num_elems - bid * blockDim.x, num_threads), 0); + + sum = BlockReduce(temp_storage) + .template Reduce(sum, op, num_elements_to_reduce); + + if (tid == 0) out[bid] = sum; +} + +// maps a warp to each row +template +__global__ void RowReduceKernel( + T in, outT out, int num_rows, int num_cols, Op op, + typename std::iterator_traits::value_type initVal) { + typedef typename std::iterator_traits::value_type value_type; + const int row = (blockIdx.x * blockDim.x + threadIdx.x) / 32; + const int lane = threadIdx.x % 32; + + if (num_cols == 1) { + int gid = threadIdx.x + blockIdx.x * blockDim.x; + if (gid < num_rows) out[gid] = in[gid]; + return; + } + + value_type sum = initVal; + int col = lane; + + if (row < num_rows && col < num_cols) { + sum = in[row * num_cols + col]; + col += 32; + for (; col < num_cols; col += 32) { + sum = op(sum, in[row * num_cols + col]); + } + } + + typedef cub::WarpReduce WarpReduce; + + __shared__ typename WarpReduce::TempStorage temp_storage; + + sum = WarpReduce(temp_storage).template Reduce(sum, op, min(num_cols, 32)); + + if (row < num_rows && lane == 0) out[row] = sum; +} + +// Works only if there are <= 16 columns +// each warps sums over multiple rows at once +template +__global__ void ColumnReduceMax16ColumnsKernel( + T in, outT out, int num_rows, int num_cols, Op op, + typename std::iterator_traits::value_type initVal) { + typedef typename std::iterator_traits::value_type value_type; + int rows_per_warp = 32 / num_cols; + + const int lane = threadIdx.x % 32; + const int lane_row = lane / num_cols; + + const int start_row_warp = + rows_per_warp * (blockIdx.y * blockDim.y + threadIdx.y); + const int start_row_lane = start_row_warp + lane_row; + int row = start_row_lane; + int col = lane % num_cols; + + value_type sum = initVal; + if (row * num_cols + col < num_rows * num_cols) + sum = in[row * num_cols + col]; + + __shared__ value_type partial_sums[32][33]; + + row += rows_per_warp * gridDim.y * blockDim.y; + for (; row < num_rows; row += rows_per_warp * gridDim.y * blockDim.y) { + int global_pos = row * num_cols + col; + if (global_pos < (num_rows * num_cols)) + sum = op(sum, in[row * num_cols + col]); + } + + const int rows_in_this_warp = min(rows_per_warp, num_rows - start_row_warp); + // not the most efficient way to do this sum + for (int i = 1; i < rows_in_this_warp; ++i) { + value_type tmp = + cub::ShuffleIndex(sum, threadIdx.x + i * num_cols, 32, 0xffffffff); + if (lane < num_cols) sum = op(sum, tmp); + } + + if (lane < num_cols) partial_sums[lane][threadIdx.y] = sum; + + __syncthreads(); + + if (threadIdx.y == 0 && threadIdx.x < num_cols) { + value_type s = partial_sums[threadIdx.x][0]; + + if (blockDim.y > 1) { + for (int row = 1; row < blockDim.y; ++row) { + s = op(s, partial_sums[threadIdx.x][row]); + } + } + + out[col * gridDim.y + blockIdx.y] = s; + } +} + +// Maps each block to a column range 32 wide +template +__global__ void ColumnReduceKernel( + T in, outT out, int num_rows, int num_cols, Op op, + typename std::iterator_traits::value_type initVal) { + typedef typename std::iterator_traits::value_type value_type; + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * 32 + threadIdx.x; + + value_type sum = initVal; + if (row < num_rows && col < num_cols) + sum = in[row * num_cols + col]; + + __shared__ value_type partial_sums[32][33]; + + row += gridDim.y * blockDim.y; + + if (col < num_cols) { + for (; row < num_rows; row += gridDim.y * blockDim.y) { + sum = op(sum, in[row * num_cols + col]); + } + } + + partial_sums[threadIdx.x][threadIdx.y] = sum; + + __syncthreads(); + + if (threadIdx.y == 0 && col < num_cols) { + value_type s = partial_sums[threadIdx.x][0]; + + // only include input values in the reduction + // elem block_rows + // - = + // - = + // # # block boundary + // - = + // - = + // # # block boundary + // - = + // = + const int numRowsThisBlock = + min(blockDim.y, num_rows - blockIdx.y * blockDim.y); + + for (int row = 1; row < numRowsThisBlock; ++row) { + s = op(s, partial_sums[threadIdx.x][row]); + } + + out[col * gridDim.y + blockIdx.y] = s; + } +} + +// does multiple warp size segmented reductions in parallel +// segments cannot cross warp boundaries (mainly used for reducing the segments +// that come from the Max16Columns column reduction kernel) +template +__global__ void CleanupSegments( + T partial_sums, outT out, int num_rows, int num_cols, int segment_size, + Op op, typename std::iterator_traits::value_type initVal) { + typedef typename std::iterator_traits::value_type value_type; + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + + value_type val = initVal; + if (tid < segment_size * num_cols) + val = partial_sums[tid]; + + typedef cub::WarpReduce WarpReduce; + + __shared__ typename WarpReduce::TempStorage temp_storage; + + const bool head_flag = (threadIdx.x % segment_size) == 0; + value_type sum = + WarpReduce(temp_storage).HeadSegmentedReduce(val, head_flag, op); + + if (head_flag && tid < segment_size * num_cols) { + out[tid / segment_size] = sum; + } +} + +// assigns one thread to a column +template +__global__ void ColumnReduceSimpleKernel(T in, outT out, int num_planes, + int num_rows, int num_cols, Op op) { + typedef typename std::iterator_traits::value_type value_type; + const int gid = threadIdx.x + blockIdx.x * blockDim.x; + const int elems_per_plane = num_rows * num_cols; + + const int plane = gid / num_cols; + const int col = gid % num_cols; + + if (plane >= num_planes) return; + + if (num_rows == 1) { + out[plane * elems_per_plane + col] = in[plane * elems_per_plane + col]; + return; + } + + value_type sum = op(in[plane * elems_per_plane + col], + in[plane * elems_per_plane + num_cols + col]); + for (int row = 2; row < num_rows; ++row) { + sum = op(sum, in[plane * elems_per_plane + row * num_cols + col]); + } + + out[plane * num_cols + col] = sum; +} + +struct RowOffset { + __host__ __device__ explicit RowOffset(const int& cols) : cols_(cols) {} + + __host__ __device__ int operator()(const int& x) const { return cols_ * x; } + + int cols_; +}; + +struct GatherOp { + __host__ __device__ GatherOp(const int& extent_x, const int& extent_y, + const int& extent_z, bool kOne) + : extent_x_(extent_x), + extent_y_(extent_y), + extent_z_(extent_z), + kOne_(kOne) { + if (kOne_) + group_size_ = extent_y_; + else + group_size_ = extent_x_ * extent_z_; + } + + __host__ __device__ int operator()(const int& ind) const { + const int group = kOne_ ? ind / group_size_ : ind % group_size_; + const int offset = kOne_ ? ind % group_size_ : ind / group_size_; + + const int x = group / extent_z_; + const int z = group % extent_z_; + + return x * extent_y_ * extent_z_ + z + offset * extent_z_; + } + + int extent_x_; + int extent_y_; + int extent_z_; + bool kOne_; + int group_size_; +}; + +template +void LaunchScalarReduction(OpKernelContext* ctx, OUT_T out, IN_T in, + int in_size, Op op, T init, + const cudaStream_t& cu_stream) { + // handle situations where low latency is important better than CUB + if (in_size <= 4096) { + const int num_blocks = 1; + const int num_threads = 256; + BlockReduceKernel + <<>>(in, out, in_size, op, init); + return; + } else if (in_size <= 1 << 19) { + const int num_threads = 256; + const int num_blocks = min(32, Eigen::divup(in_size, num_threads)); + // it seems like tailoring this to the GPU + // would be more effective, but all attempts + // at making this a multiple of the number of + // multiprocessors have lead to lower perf + // in general + // TODO(eriche) investigate this more + + Tensor temp_storage; + OP_REQUIRES_OK( + ctx, + ctx->allocate_temp( + DT_INT8, TensorShape({static_cast(num_blocks * sizeof(T))}), + &temp_storage)); + + BlockReduceKernel + <<>>( + in, (T*)temp_storage.flat().data(), in_size, op, init); + + // take care that we only reduce blocks that had some valid elements in them + // TODO(eriche): CUB currently has a bug in HeadSegmentedReduce that + // requires it to be used with a full warp. Can reduce 32 -> num_blocks + // when this is fixed. + CleanupSegments<<<1, 32, 0, cu_stream>>>( + (T*)temp_storage.flat().data(), out, 1, 1, num_blocks, op, + init); + return; + } + std::size_t temp_storage_bytes = 0; + + Tensor temp_storage; + // written as a loop because it reduces clutter + // first pass allocates memory, second launches kernel(s) + for (int i = 0; i < 2; ++i) { + auto success = cub::DeviceReduce::Reduce( + i == 0 ? nullptr : temp_storage.flat().data(), + temp_storage_bytes, in, out, in_size, op, init, cu_stream); + + OP_REQUIRES( + ctx, success == 0, + errors::Internal("CUB reduce error", cudaGetErrorString(success))); + + if (i == 0) + OP_REQUIRES_OK( + ctx, + ctx->allocate_temp( + DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), + &temp_storage)); + } +} + +template +void LaunchRowReduction(OpKernelContext* ctx, OUT_T out, IN_T in, int num_rows, + int num_cols, Op op, T init, + const cudaStream_t& cu_stream) { + if (num_cols < 1024) { + const int threads_per_block = 128; + const int warps_per_block = threads_per_block / 32; + int num_blocks = (num_rows + warps_per_block - 1) / warps_per_block; + + RowReduceKernel<<>>( + in, out, num_rows, num_cols, op, init); + return; + } + + // setup segment offsets with counting and transform iterator + RowOffset row_offset_op(num_cols); + cub::CountingInputIterator counting_iter(0); + cub::TransformInputIterator> + transform_iter(counting_iter, row_offset_op); + + std::size_t temp_storage_bytes = 0; + Tensor temp_storage; + for (int i = 0; i < 2; ++i) { + auto success = cub::DeviceSegmentedReduce::Reduce( + i == 0 ? nullptr : temp_storage.flat().data(), + temp_storage_bytes, in, out, num_rows, transform_iter, + transform_iter + 1, op, init, cu_stream); + + OP_REQUIRES(ctx, success == 0, + errors::Internal("CUB segmented reduce error", + cudaGetErrorString(success))); + + if (i == 0) + OP_REQUIRES_OK( + ctx, + ctx->allocate_temp( + DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), + &temp_storage)); + } +} + +template +void LaunchColumnReduction_LTE16Cols(OpKernelContext* ctx, OUT_T out, IN_T in, + int extent_x, int extent_y, Op op, T init, + const cudaStream_t& cu_stream) { + int rows_per_warp = 32 / extent_y; + dim3 block_dim(32, min(Eigen::divup(extent_x, rows_per_warp), 32), 1); + dim3 grid_dim(1, + Eigen::divup(static_cast(extent_x), + rows_per_warp * block_dim.y), + 1); + + grid_dim.y = min((int)grid_dim.y, 32); + + if (grid_dim.y > 2 && grid_dim.y < 32) { + int log2 = Log2Floor(grid_dim.y); + grid_dim.y = 1 << log2; + } + + if (grid_dim.y == 1) { + ColumnReduceMax16ColumnsKernel<<>>( + in, out, extent_x, extent_y, op, init); + } else { + Tensor temp_storage; + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DT_INT8, + TensorShape({static_cast( + sizeof(T) * extent_y * grid_dim.y)}), + &temp_storage)); + ColumnReduceMax16ColumnsKernel<<>>( + in, (T*)temp_storage.flat().data(), extent_x, extent_y, op, + init); + + dim3 new_grid_dim((grid_dim.y * extent_y + 31) / 32, 1, 1); + dim3 num_threads(128, 1, 1); + CleanupSegments<<>>( + (T*)temp_storage.flat().data(), out, extent_x, extent_y, + grid_dim.y, op, init); + } +} + +template +void LaunchColumnReduction_LTE4096Cols(OpKernelContext* ctx, OUT_T out, IN_T in, + int extent_x, int extent_y, Op op, + T init, const cudaStream_t& cu_stream) { + dim3 block_dim(32, min(extent_x, 32), 1); + dim3 grid_dim((extent_y + 31) / 32, 1, 1); + + if (grid_dim.x < 16) grid_dim.y = min((extent_x + 31) / 32, 32); + + if (grid_dim.y > 2 && grid_dim.y < 32) { + int log2 = Log2Floor(grid_dim.y); + grid_dim.y = 1 << log2; + } + + if (grid_dim.y == 1) { + ColumnReduceKernel<<>>( + in, out, extent_x, extent_y, op, init); + } else { + Tensor temp_storage; + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DT_INT8, + TensorShape({static_cast( + sizeof(T) * extent_y * grid_dim.y)}), + &temp_storage)); + + ColumnReduceKernel<<>>( + in, (T*)temp_storage.flat().data(), extent_x, extent_y, op, + init); + + dim3 new_grid_dim((grid_dim.y * extent_y + 31) / 32, 1, 1); + dim3 num_threads(128, 1, 1); + CleanupSegments<<>>( + (T*)temp_storage.flat().data(), out, extent_x, extent_y, + grid_dim.y, op, init); + } +} + +template +void LaunchColumnReduction(OpKernelContext* ctx, OUT_T out, IN_T in, + int extent_x, int extent_y, Op op, T init, + const cudaStream_t& cu_stream) { + if (extent_y <= 16) { + LaunchColumnReduction_LTE16Cols(ctx, out, in, extent_x, extent_y, op, init, + cu_stream); + } else if (extent_y <= 4096) { + LaunchColumnReduction_LTE4096Cols(ctx, out, in, extent_x, extent_y, op, + init, cu_stream); + } else { + int threads_per_block = 128; + int num_blocks = Eigen::divup(extent_y, threads_per_block); + + ColumnReduceSimpleKernel<<>>( + in, out, 1, extent_x, extent_y, op); + } +} + +template +void Launch3DYReduction(OpKernelContext* ctx, OUT_T out, IN_T in, int extent_x, + int extent_y, int extent_z, Op op, T init, + const cudaStream_t& cu_stream) { + int threads_per_block = 128; + int num_blocks = + (extent_x * extent_z + threads_per_block - 1) / threads_per_block; + + // TODO(eriche): this won't be very good in the case of small x + // small z and large y. + ColumnReduceSimpleKernel<<>>( + in, out, extent_x, extent_y, extent_z, op); +} + +template +void Launch3DXZReduction(OpKernelContext* ctx, OUT_T out, IN_T in, int extent_x, + int extent_y, int extent_z, Op op, T init, + const cudaStream_t& cu_stream) { + // setup segment offsets with counting and transform iterator + RowOffset row_offset_op(extent_x * extent_z); + cub::CountingInputIterator counting_iter(0); + cub::TransformInputIterator> + transform_iter(counting_iter, row_offset_op); + + GatherOp gather_op(extent_x, extent_y, extent_z, false); + typedef cub::TransformInputIterator> + gatherIterType; + gatherIterType gather_iter(counting_iter, gather_op); + + PermutationInputIterator permute_iter(in, + gather_iter); + + std::size_t temp_storage_bytes = 0; + Tensor temp_storage; + + for (int i = 0; i < 2; ++i) { + auto success = cub::DeviceSegmentedReduce::Reduce( + i == 0 ? nullptr : temp_storage.flat().data(), + temp_storage_bytes, permute_iter, out, extent_y, transform_iter, + transform_iter + 1, op, init, cu_stream); + + OP_REQUIRES(ctx, success == 0, + errors::Internal("CUB segmented reduce error", + cudaGetErrorString(success))); + + if (i == 0) + OP_REQUIRES_OK( + ctx, + ctx->allocate_temp( + DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), + &temp_storage)); + } +} + +template +void ReduceImpl(OpKernelContext* ctx, OUT_T out, IN_T in, int in_rank, + int in_dim0, int in_dim1, int in_dim2, int out_rank, + const ReductionAxes& reduction_axes, Op op, T init) { + const cudaStream_t& cu_stream = GetCudaStream(ctx); + if (out_rank == 0) { + const int in_size = in_dim0 * in_dim1 * in_dim2; + LaunchScalarReduction(ctx, out, in, in_size, op, init, cu_stream); + } else if (in_rank == 2 && out_rank == 1 && + reduction_axes[0] == 1) { // row reduction + LaunchRowReduction(ctx, out, in, in_dim0, in_dim1, op, init, cu_stream); + } else if (in_rank == 2 && out_rank == 1 && + reduction_axes[0] == 0) { // column reduction + LaunchColumnReduction(ctx, out, in, in_dim0, in_dim1, op, init, cu_stream); + } else if (in_rank == 3 && out_rank == 2 && reduction_axes[0] == 1) { + Launch3DYReduction(ctx, out, in, in_dim0, in_dim1, in_dim2, op, init, + cu_stream); + } else if (in_rank == 3 && out_rank == 1 && reduction_axes[0] == 0 && + reduction_axes[1] == 2) { + Launch3DXZReduction(ctx, out, in, in_dim0, in_dim1, in_dim2, op, init, + cu_stream); + } else { + std::stringstream ss; + ss << "Invalid reduction requested: in_rank, out_rank, axes " << in_rank + << " " << out_rank; + if (out_rank == 1) ss << " " << reduction_axes[0]; + if (out_rank == 2) ss << " " << reduction_axes[1]; + LOG(FATAL) << ss.str(); + } +} + +} // namespace functor +} // namespace tensorflow + +#endif diff --git a/tensorflow/core/kernels/reduction_ops_test.cc b/tensorflow/core/kernels/reduction_ops_test.cc index 9cdebdd4f2..9bbe993a2f 100644 --- a/tensorflow/core/kernels/reduction_ops_test.cc +++ b/tensorflow/core/kernels/reduction_ops_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" @@ -22,14 +23,59 @@ namespace tensorflow { // Creates a Graph which "reduce"s a 3D float tensor of "num" elements // into a scalar. -static Graph* ToScalar(const string& reduce, int num) { - Graph* g = new Graph(OpRegistry::Global()); - Tensor data(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)})); - data.flat().setRandom(); - Tensor axes(DT_INT32, TensorShape({3})); +template +static Graph* ToScalar(const string& reduce, int num_x, int num_y) { + auto* g = new Graph(OpRegistry::Global()); + Tensor data(DataTypeToEnum::value, TensorShape({num_x, num_y})); + data.flat().setRandom(); + Tensor axes(DT_INT32, TensorShape({2})); axes.flat()(0) = 0; axes.flat()(1) = 1; - axes.flat()(2) = 2; + test::graph::Reduce(g, reduce, test::graph::Constant(g, data), + test::graph::Constant(g, axes)); + return g; +} + +static Graph* ColReduce(const string& reduce, int num_x, int num_y) { + auto* g = new Graph(OpRegistry::Global()); + Tensor data(DT_FLOAT, TensorShape({num_x, num_y})); + data.flat().setRandom(); + Tensor axes(DT_INT32, TensorShape({1})); + axes.flat()(0) = 0; + test::graph::Reduce(g, reduce, test::graph::Constant(g, data), + test::graph::Constant(g, axes)); + return g; +} + +static Graph* RowReduce(const string& reduce, int num_x, int num_y) { + auto* g = new Graph(OpRegistry::Global()); + Tensor data(DT_FLOAT, TensorShape({num_x, num_y})); + data.flat().setRandom(); + Tensor axes(DT_INT32, TensorShape({1})); + axes.flat()(0) = 1; + test::graph::Reduce(g, reduce, test::graph::Constant(g, data), + test::graph::Constant(g, axes)); + return g; +} + +static Graph* ThreeDYReduce(const string& reduce, int num_y, int num_z) { + auto* g = new Graph(OpRegistry::Global()); + Tensor data(DT_FLOAT, TensorShape({4, num_y, num_z})); + data.flat().setRandom(); + Tensor axes(DT_INT32, TensorShape({1})); + axes.flat()(0) = 1; + test::graph::Reduce(g, reduce, test::graph::Constant(g, data), + test::graph::Constant(g, axes)); + return g; +} + +static Graph* ThreeDXZReduce(const string& reduce, int num_y, int num_z) { + auto* g = new Graph(OpRegistry::Global()); + Tensor data(DT_FLOAT, TensorShape({4, num_y, num_z})); + data.flat().setRandom(); + Tensor axes(DT_INT32, TensorShape({2})); + axes.flat()(0) = 0; + axes.flat()(1) = 2; test::graph::Reduce(g, reduce, test::graph::Constant(g, data), test::graph::Constant(g, axes)); return g; @@ -37,51 +83,100 @@ static Graph* ToScalar(const string& reduce, int num) { // Creates a bench which reduces a 3D tensor with total "num" floats // into a scalar on a "device". Runs the bench for "iters" times. +template static void ReduceToScalar(int iters, const string& device, - const string& reduce, int num) { - testing::ItemsProcessed(static_cast(iters) * num); - testing::BytesProcessed(static_cast(iters) * num * sizeof(float)); - test::Benchmark(device, ToScalar(reduce, num)).Run(iters); + const string& reduce, int num_x, int num_y) { + testing::ItemsProcessed(static_cast(iters) * num_x * num_y); + testing::BytesProcessed(static_cast(iters) * num_x * num_y * + sizeof(T)); + test::Benchmark(device, ToScalar(reduce, num_x, num_y)).Run(iters); +} + +static void DoRowReduce(int iters, const string& device, const string& reduce, + int num_x, int num_y) { + testing::ItemsProcessed(static_cast(iters) * num_x * num_y); + testing::BytesProcessed(static_cast(iters) * num_x * num_y * + sizeof(float)); + test::Benchmark(device, RowReduce(reduce, num_x, num_y)).Run(iters); +} + +static void DoColReduce(int iters, const string& device, const string& reduce, + int num_x, int num_y) { + testing::ItemsProcessed(static_cast(iters) * num_x * num_y); + testing::BytesProcessed(static_cast(iters) * num_x * num_y * + sizeof(float)); + test::Benchmark(device, ColReduce(reduce, num_x, num_y)).Run(iters); +} + +static void Do3DYReduce(int iters, const string& device, const string& reduce, + int num_x, int num_y) { + testing::ItemsProcessed(static_cast(iters) * num_x * num_y); + testing::BytesProcessed(static_cast(iters) * num_x * num_y * + sizeof(float)); + test::Benchmark(device, ThreeDYReduce(reduce, num_x, num_y)).Run(iters); +} + +static void Do3DXZReduce(int iters, const string& device, const string& reduce, + int num_x, int num_y) { + testing::ItemsProcessed(static_cast(iters) * num_x * num_y); + testing::BytesProcessed(static_cast(iters) * num_x * num_y * + sizeof(float)); + test::Benchmark(device, ThreeDXZReduce(reduce, num_x, num_y)).Run(iters); +} + +static void BM_Sum2DToScalarGPU(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "Sum", num_x, num_y); +} +BENCHMARK(BM_Sum2DToScalarGPU)->RangePair(1, 8192, 1, 8192); + +static void BM_Sum2DToScalarGPUComplex(int iters, int num_x, int num_y) { + ReduceToScalar>(iters, "gpu", "Sum", num_x, num_y); +} +BENCHMARK(BM_Sum2DToScalarGPUComplex)->RangePair(1, 8192, 1, 8192); + +static void BM_Sum2DToScalarGPUHalf(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "Sum", num_x, num_y); } +BENCHMARK(BM_Sum2DToScalarGPUHalf)->RangePair(1, 8192, 1, 8192); -static void BM_Sum3DToScalarCPU(int iters, int num) { - ReduceToScalar(iters, "cpu", "Sum", num); +static void BM_Sum2DRowReduceGPU(int iters, int num_x, int num_y) { + DoRowReduce(iters, "gpu", "Sum", num_x, num_y); } -BENCHMARK(BM_Sum3DToScalarCPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Sum2DRowReduceGPU)->RangePair(1, 8192, 1, 8192); -static void BM_Max3DToScalarCPU(int iters, int num) { - ReduceToScalar(iters, "cpu", "Max", num); +static void BM_Sum2DColumnReduceGPU(int iters, int num_x, int num_y) { + DoColReduce(iters, "gpu", "Sum", num_x, num_y); } -BENCHMARK(BM_Max3DToScalarCPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Sum2DColumnReduceGPU)->RangePair(1, 8192, 1, 8192); -static void BM_Prod3DToScalarCPU(int iters, int num) { - ReduceToScalar(iters, "cpu", "Prod", num); +static void BM_Sum3DYReduceGPU(int iters, int num_x, int num_y) { + Do3DYReduce(iters, "gpu", "Sum", num_x, num_y); } -BENCHMARK(BM_Prod3DToScalarCPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Sum3DYReduceGPU)->RangePair(64, 4096, 64, 4096); -static void BM_Mean3DToScalarCPU(int iters, int num) { - ReduceToScalar(iters, "cpu", "Mean", num); +static void BM_Sum3DXZReduceGPU(int iters, int num_x, int num_y) { + Do3DXZReduce(iters, "gpu", "Sum", num_x, num_y); } -BENCHMARK(BM_Mean3DToScalarCPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Sum3DXZReduceGPU)->RangePair(64, 4096, 64, 4096); -static void BM_Sum3DToScalarGPU(int iters, int num) { - ReduceToScalar(iters, "gpu", "Sum", num); +static void BM_Mean2DToScalarGPU(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "Mean", num_x, num_y); } -BENCHMARK(BM_Sum3DToScalarGPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Mean2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); -static void BM_Max3DToScalarGPU(int iters, int num) { - ReduceToScalar(iters, "gpu", "Max", num); +static void BM_Max2DToScalarGPU(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "Max", num_x, num_y); } -BENCHMARK(BM_Max3DToScalarGPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Max2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); -static void BM_Prod3DToScalarGPU(int iters, int num) { - ReduceToScalar(iters, "gpu", "Prod", num); +static void BM_Min2DToScalarGPU(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "Min", num_x, num_y); } -BENCHMARK(BM_Prod3DToScalarGPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Min2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); -static void BM_Mean3DToScalarGPU(int iters, int num) { - ReduceToScalar(iters, "gpu", "Mean", num); +static void BM_Bool2DToScalarGPU(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "All", num_x, num_y); } -BENCHMARK(BM_Mean3DToScalarGPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Bool2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); } // end namespace tensorflow diff --git a/tensorflow/core/util/permutation_input_iterator.h b/tensorflow/core/util/permutation_input_iterator.h new file mode 100644 index 0000000000..f6375b2515 --- /dev/null +++ b/tensorflow/core/util/permutation_input_iterator.h @@ -0,0 +1,134 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_UTIL_PERMUTATION_INPUT_ITERATOR_H_ +#define TENSORFLOW_UTIL_PERMUTATION_INPUT_ITERATOR_H_ + +#include +#include + +namespace tensorflow { + +template +class PermutationInputIterator { + public: + // Required iterator traits + typedef PermutationInputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of + ///< subtracting one iterator from another + typedef ValueType + value_type; ///< The type of the element the iterator can point to + typedef ValueType* pointer; ///< The type of a pointer to an element the + ///< iterator can point to + typedef ValueType reference; ///< The type of a reference to an element the + ///< iterator can point to + + typedef std::random_access_iterator_tag + iterator_category; ///< The iterator category + + private: + InputIteratorT input_itr; + IndexIteratorT index_itr; + + public: + /// Constructor + __host__ __device__ __forceinline__ PermutationInputIterator( + InputIteratorT input_itr, ///< Input iterator to wrap + IndexIteratorT index_itr) ///< Conversion functor to wrap + : input_itr(input_itr), index_itr(index_itr) {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) { + self_type retval = *this; + index_itr++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() { + index_itr++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const { + return input_itr[*index_itr]; + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const { + self_type retval(input_itr, index_itr + n); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { + index_itr += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const { + self_type retval(input_itr, index_itr - n); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { + index_itr -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type + operator-(self_type other) const { + return index_itr - other.index_itr; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance n) const { + return input_itr[index_itr[n]]; + } + + /// Structure dereference + __host__ __device__ __forceinline__ pointer operator->() { + return input_itr + *index_itr; + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { + return (index_itr == rhs.index_itr && input_itr == rhs.input_itr); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { + return !(*this == rhs); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { + return os; + } +}; + +} // end namespace tensorflow + +#endif // TENSORFLOW_UTIL_PERMUTATION_INPUT_ITERATOR_H_ diff --git a/tensorflow/core/util/transform_output_iterator.h b/tensorflow/core/util/transform_output_iterator.h new file mode 100644 index 0000000000..1640791ad1 --- /dev/null +++ b/tensorflow/core/util/transform_output_iterator.h @@ -0,0 +1,149 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_UTIL_TRANSFORM_OUTPUT_ITERATOR_H_ +#define TENSORFLOW_UTIL_TRANSFORM_OUTPUT_ITERATOR_H_ + +#include +#include + +namespace tensorflow { + +template +class TransformOutputIterator { + private: + // Proxy object + struct Reference { + StoreType* ptr; + ConversionOp conversion_op; + + /// Constructor + __host__ __device__ __forceinline__ Reference(StoreType* ptr, + ConversionOp conversion_op) + : ptr(ptr), conversion_op(conversion_op) {} + + /// Assignment + __host__ __device__ __forceinline__ InputType operator=(InputType val) { + *ptr = conversion_op(val); + return val; + } + }; + + public: + // Required iterator traits + typedef TransformOutputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of + ///< subtracting one iterator from another + typedef void + value_type; ///< The type of the element the iterator can point to + typedef void pointer; ///< The type of a pointer to an element the iterator + ///< can point to + typedef Reference reference; ///< The type of a reference to an element the + ///< iterator can point to + + typedef std::random_access_iterator_tag + iterator_category; ///< The iterator category + + /*private:*/ + + StoreType* ptr; + ConversionOp conversion_op; + + public: + /// Constructor + template + __host__ __device__ __forceinline__ TransformOutputIterator( + QualifiedStoreType* ptr, + ConversionOp conversionOp) ///< Native pointer to wrap + : ptr(ptr), conversion_op(conversionOp) {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) { + self_type retval = *this; + ptr++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() { + ptr++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const { + return Reference(ptr, conversion_op); + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const { + self_type retval(ptr + n, conversion_op); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { + ptr += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const { + self_type retval(ptr - n, conversion_op); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { + ptr -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type + operator-(self_type other) const { + return ptr - other.ptr; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance n) const { + return Reference(ptr + n, conversion_op); + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { + return (ptr == rhs.ptr); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { + return (ptr != rhs.ptr); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { + return os; + } +}; + +} // end namespace tensorflow + +#endif // TENSORFLOW_UTIL_TRANSFORM_OUTPUT_ITERATOR_H_ diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index ed3f02aedd..ed89a6dc48 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1708,6 +1708,26 @@ cuda_py_test( tags = ["no_windows_gpu"], ) +cuda_py_test( + name = "reduction_ops_test_big", + size = "medium", + srcs = ["reduction_ops_test_big.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + ], + tags = [ + "manual", + "no_gpu", + "nogpu", + "noguitar", + "notap", + ], +) + cuda_py_test( name = "relu_op_test", size = "small", diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py index eb06e067a7..de80fb3055 100644 --- a/tensorflow/python/kernel_tests/cholesky_op_test.py +++ b/tensorflow/python/kernel_tests/cholesky_op_test.py @@ -183,14 +183,11 @@ class CholeskyGradTest(test.TestCase): self.runFiniteDifferences( shapes, dtypes=(dtypes_lib.float32, dtypes_lib.float64)) - # TODO(eriche): investigate why this test fails only in opensource - # ubuntu gpu python3 - - # def testSmallMatricesComplex(self): - # np.random.seed(0) - # shapes = self.getShapes([1, 2, 10]) - # self.runFiniteDifferences( - # shapes, dtypes=(dtypes_lib.complex64, dtypes_lib.complex128)) + def testSmallMatricesComplex(self): + np.random.seed(0) + shapes = self.getShapes([1, 2, 10]) + self.runFiniteDifferences( + shapes, dtypes=(dtypes_lib.complex64, dtypes_lib.complex128)) def testOneBlockMatrices(self): np.random.seed(0) diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py index 04ce99a4a6..8d6b7925e4 100644 --- a/tensorflow/python/kernel_tests/reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/reduction_ops_test.py @@ -175,6 +175,24 @@ class SumReductionTest(BaseReductionTest): np_arr = self._makeIncremental((2,) * rank, dtypes.int32) self._compareAllAxes(np_arr) + def testFloat16(self): + for rank in range(1, _MAX_RANK + 1): + np_arr = self._makeIncremental((2,) * rank, dtypes.float16) + self._compareAllAxes(np_arr) + + # test that mean doesn't overflow + # only on GPU, since it has the more accurate implementation + if not test.is_gpu_available(): + return + + arr = np.ones([68000], dtype=np.float16) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_arr = array_ops.constant(arr) + tf_mean = math_ops.reduce_mean(tf_arr, 0, False) + tf_out_mean = sess.run(tf_mean) + self.assertAllClose(tf_out_mean, 1.) + def testFloat32(self): for rank in range(1, _MAX_RANK + 1): np_arr = self._makeIncremental((2,) * rank, dtypes.float32) @@ -523,7 +541,7 @@ class MinReductionTest(test.TestCase): def testFloatReduce3D(self): # Create a 3D array of floats and reduce across all possible # dimensions - np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float32) + np_arr = np.arange(1, 31).reshape([2, 3, 5]).astype(np.float32) self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) @@ -537,7 +555,7 @@ class MinReductionTest(test.TestCase): def testDoubleReduce3D(self): # Create a 3D array of doubles and reduce across all possible # dimensions - np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float64) + np_arr = np.arange(1, 31).reshape([2, 3, 5]).astype(np.float64) self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) @@ -629,7 +647,7 @@ class MaxReductionTest(test.TestCase): def testFloatReduce3D(self): # Create a 3D array of floats and reduce across all possible # dimensions - np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float32) + np_arr = np.arange(-31, -1).reshape([2, 3, 5]).astype(np.float32) self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) @@ -643,7 +661,7 @@ class MaxReductionTest(test.TestCase): def testDoubleReduce3D(self): # Create a 3D array of doubles and reduce across all possible # dimensions - np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float64) + np_arr = np.arange(-31, -1).reshape([2, 3, 5]).astype(np.float64) self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) @@ -656,7 +674,7 @@ class MaxReductionTest(test.TestCase): def testGradient(self): s = [2, 3, 4, 2] - x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) + x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t, [1, 2]) @@ -666,7 +684,7 @@ class MaxReductionTest(test.TestCase): def testGradient2(self): s = [2, 3, 4, 2] - x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) + x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t, [1]) @@ -676,7 +694,7 @@ class MaxReductionTest(test.TestCase): def testGradient3(self): s = [2, 3, 4, 2] - x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) + x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t, [2]) @@ -686,7 +704,7 @@ class MaxReductionTest(test.TestCase): def testGradient4(self): s = [2, 3, 4, 2] - x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) + x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t) diff --git a/tensorflow/python/kernel_tests/reduction_ops_test_big.py b/tensorflow/python/kernel_tests/reduction_ops_test_big.py new file mode 100644 index 0000000000..0959adb026 --- /dev/null +++ b/tensorflow/python/kernel_tests/reduction_ops_test_big.py @@ -0,0 +1,179 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for reduction ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class BaseReductionTest(test.TestCase): + + def _tf_reduce(self, x, reduction_axes, keep_dims): + raise NotImplementedError() + + +class BigReductionTest(BaseReductionTest): + """Test reductions for sum and boolean all over a wide range of shapes.""" + + def _tf_reduce_max(self, x, reduction_axes, keep_dims): + return math_ops.reduce_max(x, reduction_axes, keep_dims) + + def _tf_reduce_all(self, x, reduction_axes, keep_dims): + return math_ops.reduce_all(x, reduction_axes, keep_dims) + + def _tf_reduce_mean(self, x, reduction_axes, keep_dims): + return math_ops.reduce_mean(x, reduction_axes, keep_dims) + + def _tf_reduce_sum(self, x, reduction_axes, keep_dims): + return math_ops.reduce_sum(x, reduction_axes, keep_dims) + + def testFloat32Sum(self): + # make sure we test all possible kernel invocations + # logic is the same for all ops, test just float32 for brevity + arr_ = np.ones([4097, 4097], dtype=np.float32) + for size_x in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + for size_y in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + arr = arr_[0:size_x, 0:size_y] + col_sum = np.ones([size_y], dtype=np.float32) * size_x + row_sum = np.ones([size_x], dtype=np.float32) * size_y + full_sum = np.ones([], dtype=np.float32) * size_x * size_y + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_row_sum = self._tf_reduce_sum(arr, 1, False) + tf_col_sum = self._tf_reduce_sum(arr, 0, False) + tf_full_sum = self._tf_reduce_sum(arr, [0, 1], False) + tf_out_row, tf_out_col, tf_out_full = sess.run( + [tf_row_sum, tf_col_sum, tf_full_sum]) + self.assertAllClose(col_sum, tf_out_col) + self.assertAllClose(row_sum, tf_out_row) + self.assertAllClose(full_sum, tf_out_full) + + arr_ = np.ones([130, 130, 130], dtype=np.float32) + for size_x in range(1, 130, 13): + for size_y in range(1, 130, 13): + for size_z in range(1, 130, 13): + arr = arr_[0:size_x, 0:size_y, 0:size_z] + sum_y = np.ones([size_x, size_z], dtype=np.float32) + sum_xz = np.ones([size_y], dtype=np.float32) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_sum_xz = self._tf_reduce_mean(arr, [0, 2], False) + tf_sum_y = self._tf_reduce_mean(arr, 1, False) + tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y]) + self.assertAllClose(sum_y, tf_out_sum_y) + self.assertAllClose(sum_xz, tf_out_sum_xz) + + def testFloat32Max(self): + # make sure we test all possible kernel invocations + # logic is the same for all ops, test just float32 for brevity + arr_ = np.random.uniform( + low=-3, high=-1, size=[4105, 4105]).astype(np.float32) + for size_x in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + for size_y in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + arr = arr_[0:size_x, 0:size_y] + col_max = np.max(arr, axis=0) + row_max = np.max(arr, axis=1) + full_max = np.max(col_max) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_row_max = self._tf_reduce_max(arr, 1, False) + tf_col_max = self._tf_reduce_max(arr, 0, False) + tf_full_max = self._tf_reduce_max(arr, [0, 1], False) + tf_out_row, tf_out_col, tf_out_full = sess.run( + [tf_row_max, tf_col_max, tf_full_max]) + self.assertAllClose(col_max, tf_out_col) + self.assertAllClose(row_max, tf_out_row) + self.assertAllClose(full_max, tf_out_full) + + arr_ = np.random.uniform( + low=-3, high=-1, size=[130, 130, 130]).astype(np.float32) + for size_x in range(1, 130, 13): + for size_y in range(1, 130, 13): + for size_z in range(1, 130, 13): + arr = arr_[0:size_x, 0:size_y, 0:size_z] + sum_y = np.max(arr, axis=1) + sum_xz = np.max(arr, axis=(0, 2)) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_sum_xz = self._tf_reduce_max(arr, [0, 2], False) + tf_sum_y = self._tf_reduce_max(arr, 1, False) + tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y]) + self.assertAllClose(sum_y, tf_out_sum_y) + self.assertAllClose(sum_xz, tf_out_sum_xz) + + def testBooleanAll(self): + # make sure we test all possible kernel invocations + # test operation where T(0) is not the identity + arr_ = np.ones([4097, 4097], dtype=np.bool) + for size_x in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + for size_y in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + arr = arr_[0:size_x, 0:size_y] + col_sum = np.ones([size_y], dtype=np.bool) + row_sum = np.ones([size_x], dtype=np.bool) + full_sum = np.ones([1], dtype=np.bool).reshape([]) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_row_sum = self._tf_reduce_all(arr, 1, False) + tf_col_sum = self._tf_reduce_all(arr, 0, False) + tf_full_sum = self._tf_reduce_all(arr, [0, 1], False) + tf_out_row, tf_out_col, tf_out_full = sess.run( + [tf_row_sum, tf_col_sum, tf_full_sum]) + self.assertAllClose(col_sum, tf_out_col) + self.assertAllClose(row_sum, tf_out_row) + self.assertAllClose(full_sum, tf_out_full) + + arr_ = np.ones([130, 130, 130], dtype=np.bool) + for size_x in range(1, 130, 13): + for size_y in range(1, 130, 13): + for size_z in range(1, 130, 13): + arr = arr_[0:size_x, 0:size_y, 0:size_z] + sum_y = np.ones([size_x, size_z], dtype=np.bool) + sum_xz = np.ones([size_y], dtype=np.bool) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_sum_xz = self._tf_reduce_all(arr, [0, 2], False) + tf_sum_y = self._tf_reduce_all(arr, 1, False) + tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y]) + self.assertAllClose(sum_y, tf_out_sum_y) + self.assertAllClose(sum_xz, tf_out_sum_xz) + + +if __name__ == "__main__": + test.main() -- GitLab From a4fe92514fab8d75d6acf5f593cf7e7c8cea919f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 15:56:58 -0700 Subject: [PATCH 177/294] Implement reduce_min and reduce_max gradients. Closes #12715 PiperOrigin-RevId: 167335900 --- tensorflow/cc/gradients/math_grad.cc | 66 +++++++++++++++++++++++ tensorflow/cc/gradients/math_grad_test.cc | 49 +++++++++++++++++ 2 files changed, 115 insertions(+) diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc index 09a15fbe5f..d90654f2e9 100644 --- a/tensorflow/cc/gradients/math_grad.cc +++ b/tensorflow/cc/gradients/math_grad.cc @@ -687,6 +687,72 @@ Status MeanGrad(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("Mean", MeanGrad); +Status MinOrMaxGrad(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + // The partial derivative for any input along a "reduced" dimension + // is 1 when it is the min (or max) and 0 everywhere else. So the + // gradient calculation is identical for both operators. + // + // There's a special case for propagating gradients when there are + // multiple minima (or maxima) - we choose to divide the gradient + // equally among all matching inputs. + // + // Please note this comment + // https://github.com/tensorflow/tensorflow/issues/4886#issuecomment-256836063 + // for details. + + // Running example: + // input: [[5, 5, 5], + // [1, 2, -3]] + // reduction_indices: [1] + auto input = op.input(0); + auto reduction_indices = op.input(1); + + // [2, 3] + auto input_shape = Shape(scope, input); + + // [2, 1] + auto output_shape_kept_dims = + ReducedShapeHelper(scope, input_shape, reduction_indices); + + // for op=min (say) + // output = [5, -3] + // y = [[5], + // [-3]] + auto y = Reshape(scope, op.output(0), output_shape_kept_dims); + + // reshape([g1, g2], [2, 1]) = [[g1], + // [g2]] + auto grad = Reshape(scope, grad_inputs[0], output_shape_kept_dims); + + // indicators = equal(y, input) + // = equal([[5], [[5, 5, 5], + // [-3]], [1, 2, -3]]) + // = [[1, 1, 1], + // [0, 0, 1]] + auto indicators = Cast(scope, Equal(scope, y, input), grad_inputs[0].type()); + + // [[3], + // [1]] + auto num_selected = Reshape(scope, Sum(scope, indicators, reduction_indices), + output_shape_kept_dims); + + // [[1/3, 1/3, 1/3], + // [0, 0, 1]] + auto scale = Div(scope, indicators, num_selected); + + // [[g1/3, g1/3, g1/3], + // [0, 0, g2]] + grad_outputs->push_back(Mul(scope, scale, grad)); + + // Stop propagation along reduction_indices + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("Min", MinOrMaxGrad); +REGISTER_GRADIENT_OP("Max", MinOrMaxGrad); + // MatMulGrad helper function used to compute two MatMul operations // based on input matrix transposition combinations. Status MatMulGradHelper(const Scope& scope, const bool is_batch, diff --git a/tensorflow/cc/gradients/math_grad_test.cc b/tensorflow/cc/gradients/math_grad_test.cc index 62b59b25c7..5b1558dd82 100644 --- a/tensorflow/cc/gradients/math_grad_test.cc +++ b/tensorflow/cc/gradients/math_grad_test.cc @@ -955,6 +955,55 @@ TEST_F(NaryGradTest, Mean) { RunTest({x}, {x_shape}, {y}, {y_shape}); } +TEST_F(NaryGradTest, Min) { + TensorShape x_shape({2, 3}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto y = Min(scope_, x, {-1}); + // y's shape is the result of reducing x along axes -1 (= 1) + TensorShape y_shape({2}); + Tensor x_init_value = + test::AsTensor({0.5f, 0.7f, 0.2f, 1.0f, 1.5f, -2.8f}, x_shape); + RunTest(x, x_init_value, y, y_shape); +} + +TEST_F(NaryGradTest, Max) { + TensorShape x_shape({2, 3}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto y = Max(scope_, x, {-1}); + // y's shape is the result of reducing x along axes -1 (= 1) + TensorShape y_shape({2}); + Tensor x_init_value = + test::AsTensor({0.5f, 0.7f, 0.2f, 1.0f, 1.5f, -2.8f}, x_shape); + RunTest(x, x_init_value, y, y_shape); +} + +TEST_F(NaryGradTest, MinMulti) { + // Test gradient when there are multiple minima. + // Note that we cannot directly use a test Tensor with multiple + // minima, as the numeric estimator will calculate incorrect + // gradients when perturbing each entry in the Tensor (which then + // changes how many minima exist.) + // Instead, we use a single input that broadcast-multiplies a larger + // tensor with equal values, and apply reduce_min to the multiplied + // result. + TensorShape x_shape({1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto all_same = Mul(scope_, Const(scope_, {1.f, 1.f, 1.f}), x); + auto y = Min(scope_, all_same, {0}); + // y is a [3] shaped tensor reduced along dimension 0, so it is [1] shaped + TensorShape y_shape({1}); + RunTest({x}, {x_shape}, {y}, {y_shape}); +} + +TEST_F(NaryGradTest, MaxMulti) { + TensorShape x_shape({1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto all_same = Mul(scope_, Const(scope_, {1.f, 1.f, 1.f}), x); + auto y = Max(scope_, all_same, {0}); + TensorShape y_shape({1}); + RunTest({x}, {x_shape}, {y}, {y_shape}); +} + TEST_F(NaryGradTest, AddN) { TensorShape shape({3, 2, 5}); std::vector xs; -- GitLab From 25b301d2f80c76055f2ab4dc6ddb9cab0ab62cef Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 1 Sep 2017 16:20:38 -0700 Subject: [PATCH 178/294] Automated g4 rollback of changelist 167318475 PiperOrigin-RevId: 167338828 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 0578ae8b0f..6748a76562 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -68,11 +68,12 @@ def _create_global_step(graph): return variable_scope.get_variable( ops.GraphKeys.GLOBAL_STEP, shape=[], - dtype=dtypes.int64, + dtype=dtypes.int32, initializer=init_ops.zeros_initializer(), trainable=False, use_resource=True, - collections=[ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.GLOBAL_STEP]) + collections=[ops.GraphKeys.GLOBAL_VARIABLES, + ops.GraphKeys.GLOBAL_STEP]) def _sync_variables_ops(): -- GitLab From e09619b923f3e8ac633fd6beaa96f2298988c355 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 1 Sep 2017 16:23:01 -0700 Subject: [PATCH 179/294] Increase the size of the flaky sample_stats_test.py PiperOrigin-RevId: 167339080 --- tensorflow/contrib/distributions/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index c78b064b4f..c2b99d67c7 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -341,7 +341,7 @@ cuda_py_test( cuda_py_test( name = "sample_stats_test", - size = "small", + size = "medium", srcs = ["python/kernel_tests/sample_stats_test.py"], additional_deps = [ ":distributions_py", -- GitLab From b541b4812289b19cd4d4ba68916ab386878f8f17 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 16:31:36 -0700 Subject: [PATCH 180/294] Update CUB to 1.7.3 PiperOrigin-RevId: 167339917 --- tensorflow/contrib/cmake/external/cub.cmake | 4 ++-- tensorflow/workspace.bzl | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/cmake/external/cub.cmake b/tensorflow/contrib/cmake/external/cub.cmake index 477572d588..d98579d207 100644 --- a/tensorflow/contrib/cmake/external/cub.cmake +++ b/tensorflow/contrib/cmake/external/cub.cmake @@ -14,8 +14,8 @@ # ============================================================================== include (ExternalProject) -set(cub_URL http://mirror.bazel.build/github.com/NVlabs/cub/archive/69ceda618313df8e9cac6659d607b08949455d14.tar.gz) -set(cub_HASH SHA256=87e856522c283b8ea887c3b61d7d5b252d2dd74abac4f1d756d776e721223e82) +set(cub_URL http://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.3.zip) +set(cub_HASH SHA256=b7ead9e291d34ffa8074243541c1380d63be63f88de23de8ee548db573b72ebe) set(cub_BUILD ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_ARCHIVE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/cub_archive) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 2c7acd809a..646459646d 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -683,11 +683,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.new_http_archive( name = "cub_archive", urls = [ - "http://mirror.bazel.build/github.com/NVlabs/cub/archive/69ceda618313df8e9cac6659d607b08949455d14.tar.gz", - "https://github.com/NVlabs/cub/archive/69ceda618313df8e9cac6659d607b08949455d14.tar.gz", + "http://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.3.zip", + "https://github.com/NVlabs/cub/archive/1.7.3.zip", ], - sha256 = "87e856522c283b8ea887c3b61d7d5b252d2dd74abac4f1d756d776e721223e82", - strip_prefix = "cub-69ceda618313df8e9cac6659d607b08949455d14", + sha256 = "b7ead9e291d34ffa8074243541c1380d63be63f88de23de8ee548db573b72ebe", + strip_prefix = "cub-1.7.3", build_file = str(Label("//third_party:cub.BUILD")), ) -- GitLab From e83d8ab0959b6a0e50d14acbfd39c5ab79e449d5 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 1 Sep 2017 16:33:24 -0700 Subject: [PATCH 181/294] Contrib ops and kernels for summary ops which write without touching python. PiperOrigin-RevId: 167340103 --- tensorflow/BUILD | 1 + tensorflow/contrib/BUILD | 1 + tensorflow/contrib/summary/BUILD | 59 +++ tensorflow/contrib/summary/summary_ops.py | 159 +++++++ .../contrib/summary/summary_ops_test.py | 52 +++ tensorflow/core/BUILD | 2 + tensorflow/core/kernels/BUILD | 39 ++ tensorflow/core/kernels/summary_interface.cc | 432 ++++++++++++++++++ tensorflow/core/kernels/summary_interface.h | 59 +++ .../core/kernels/summary_interface_test.cc | 167 +++++++ tensorflow/core/kernels/summary_kernels.cc | 226 +++++++++ tensorflow/core/ops/summary_ops.cc | 218 +++++++++ tensorflow/python/eager/context.py | 28 -- tensorflow/python/eager/core_test.py | 7 +- 14 files changed, 1416 insertions(+), 34 deletions(-) create mode 100644 tensorflow/contrib/summary/BUILD create mode 100644 tensorflow/contrib/summary/summary_ops.py create mode 100644 tensorflow/contrib/summary/summary_ops_test.py create mode 100644 tensorflow/core/kernels/summary_interface.cc create mode 100644 tensorflow/core/kernels/summary_interface.h create mode 100644 tensorflow/core/kernels/summary_interface_test.cc create mode 100644 tensorflow/core/kernels/summary_kernels.cc create mode 100644 tensorflow/core/ops/summary_ops.cc diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 9faa0964ca..2544e26b8e 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -336,6 +336,7 @@ filegroup( "//tensorflow/contrib/staging:all_files", "//tensorflow/contrib/stat_summarizer:all_files", "//tensorflow/contrib/stateless:all_files", + "//tensorflow/contrib/summary:all_files", "//tensorflow/contrib/tensor_forest:all_files", "//tensorflow/contrib/tensor_forest/hybrid:all_files", "//tensorflow/contrib/tensor_forest/kernels/v4:all_files", diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index d9f2b84346..84fcc0d014 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -73,6 +73,7 @@ py_library( "//tensorflow/contrib/staging", "//tensorflow/contrib/stat_summarizer:stat_summarizer_py", "//tensorflow/contrib/stateless", + "//tensorflow/contrib/summary:summary_ops", "//tensorflow/contrib/tensor_forest:init_py", "//tensorflow/contrib/tensorboard", "//tensorflow/contrib/testing:testing_py", diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD new file mode 100644 index 0000000000..bc30502264 --- /dev/null +++ b/tensorflow/contrib/summary/BUILD @@ -0,0 +1,59 @@ +licenses(["notice"]) # Apache 2.0 + +exports_files([ + "LICENSE", +]) + +load( + "//tensorflow:tensorflow.bzl", + "py_test", + "tf_gen_op_wrapper_py", +) + +tf_gen_op_wrapper_py( + name = "gen_summary_ops", + out = "gen_summary_ops.py", + deps = ["//tensorflow/core:summary_ops_op_lib"], +) + +py_test( + name = "summary_ops_test", + srcs = ["summary_ops_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":summary_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform", + "//tensorflow/python:training", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + ], +) + +py_library( + name = "summary_ops", + srcs = ["summary_ops.py"], + srcs_version = "PY2AND3", + visibility = ["//tensorflow:internal"], + deps = [ + ":gen_summary_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:summary_op_util", + "//tensorflow/python:training", + "//tensorflow/python/eager:context", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py new file mode 100644 index 0000000000..05e627adf1 --- /dev/null +++ b/tensorflow/contrib/summary/summary_ops.py @@ -0,0 +1,159 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Operations to emit summaries.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.summary import gen_summary_ops +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import summary_op_util +from tensorflow.python.training import training_util + + +# Name for a collection which is expected to have at most a single boolean +# Tensor. If this tensor is True the summary ops will record summaries. +_SHOULD_RECORD_SUMMARIES_NAME = "ShouldRecordSummaries" + + +def should_record_summaries(): + """Returns boolean Tensor which is true if summaries should be recorded.""" + should_record_collection = ops.get_collection(_SHOULD_RECORD_SUMMARIES_NAME) + if not should_record_collection: + return constant_op.constant(False) + if len(should_record_collection) != 1: + raise ValueError( + "More than one tensor specified for whether summaries " + "should be recorded: %s" % should_record_collection) + return should_record_collection[0] + + +# TODO(apassos) consider how to handle local step here. +def record_summaries_every_n_global_steps(n): + """Sets the should_record_summaries Tensor to true if global_step % n == 0.""" + collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) + collection_ref[:] = [training_util.get_global_step() % n == 0] + + +def always_record_summaries(): + """Sets the should_record_summaries Tensor to always true.""" + collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) + collection_ref[:] = [constant_op.constant(True)] + + +def never_record_summaries(): + """Sets the should_record_summaries Tensor to always false.""" + collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) + collection_ref[:] = [constant_op.constant(False)] + + +def create_summary_file_writer(logdir, + max_queue=None, + flush_secs=None, + filename_suffix=None): + """Creates a summary file writer in the current context.""" + if max_queue is None: + max_queue = constant_op.constant(10) + if flush_secs is None: + flush_secs = constant_op.constant(120) + if filename_suffix is None: + filename_suffix = constant_op.constant("") + resource = gen_summary_ops.summary_writer() + gen_summary_ops.create_summary_file_writer(resource, logdir, max_queue, + flush_secs, filename_suffix) + context.context().summary_writer_resource = resource + + +def _nothing(): + """Convenient else branch for when summaries do not record.""" + return + + +def generic(name, tensor, metadata, family=None): + """Writes a tensor summary if possible.""" + + def record(): + with summary_op_util.summary_scope( + name, family, values=[tensor]) as (tag, scope): + gen_summary_ops.write_summary(context.context().summary_writer_resource, + training_util.get_global_step(), tensor, + tag, metadata, name=scope) + return control_flow_ops.cond(should_record_summaries(), record, _nothing) + + +def scalar(name, tensor, family=None): + """Writes a scalar summary if possible.""" + + def record(): + with summary_op_util.summary_scope( + name, family, values=[tensor]) as (tag, scope): + gen_summary_ops.write_scalar_summary( + context.context().summary_writer_resource, + training_util.get_global_step(), tag, tensor, name=scope) + + return control_flow_ops.cond(should_record_summaries(), record, _nothing) + + +def histogram(name, tensor, family=None): + """Writes a histogram summary if possible.""" + + def record(): + with summary_op_util.summary_scope( + name, family, values=[tensor]) as (tag, scope): + gen_summary_ops.write_histogram_summary( + context.context().summary_writer_resource, + training_util.get_global_step(), tag, tensor, name=scope) + + return control_flow_ops.cond(should_record_summaries(), record, _nothing) + + +def image(name, tensor, bad_color=None, max_images=3, family=None): + """Writes an image summary if possible.""" + + def record(): + if bad_color is None: + bad_color_ = constant_op.constant([255, 0, 0, 255], dtype=dtypes.uint8) + with summary_op_util.summary_scope( + name, family, values=[tensor]) as (tag, scope): + gen_summary_ops.write_image_summary( + context.context().summary_writer_resource, + training_util.get_global_step(), tag, tensor, bad_color_, max_images, + name=scope) + + return control_flow_ops.cond(should_record_summaries(), record, _nothing) + + +def audio(name, tensor, sample_rate, max_outputs, family=None): + """Writes an audio summary if possible.""" + + def record(): + with summary_op_util.summary_scope( + name, family, values=[tensor]) as (tag, scope): + gen_summary_ops.write_audio_summary( + context.context().summary_writer_resource, + training_util.get_global_step(), + tag, + tensor, + sample_rate=sample_rate, + max_outputs=max_outputs, + name=scope) + + return control_flow_ops.cond(should_record_summaries(), record, _nothing) diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py new file mode 100644 index 0000000000..56c1a16f7f --- /dev/null +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -0,0 +1,52 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tempfile + +from tensorflow.contrib.summary import summary_ops +from tensorflow.python.eager import test +from tensorflow.python.framework import test_util +from tensorflow.python.platform import gfile +from tensorflow.python.training import training_util + + +class TargetTest(test_util.TensorFlowTestCase): + + def testShouldRecordSummary(self): + self.assertFalse(summary_ops.should_record_summaries().numpy()) + summary_ops.always_record_summaries() + self.assertTrue(summary_ops.should_record_summaries().numpy()) + + def testSummaryOps(self): + training_util.get_or_create_global_step() + logdir = tempfile.mkdtemp() + summary_ops.create_summary_file_writer(logdir, max_queue=0) + summary_ops.always_record_summaries() + summary_ops.generic('tensor', 1, '') + summary_ops.scalar('scalar', 2.0) + summary_ops.histogram('histogram', [1.0]) + summary_ops.image('image', [[[[1.0]]]]) + summary_ops.audio('audio', [[1.0]], 1.0, 1) + # The working condition of the ops is tested in the C++ test so we just + # test here that we're calling them correctly. + self.assertTrue(gfile.Exists(logdir)) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d01f7843ee..35394eeb87 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -566,6 +566,7 @@ tf_gen_op_libs( "state_ops", "stateless_random_ops", "string_ops", + "summary_ops", "training_ops", ], ) @@ -776,6 +777,7 @@ cc_library( "//tensorflow/core/kernels:state", "//tensorflow/core/kernels:stateless_random_ops", "//tensorflow/core/kernels:string", + "//tensorflow/core/kernels:summary_kernels", "//tensorflow/core/kernels:training_ops", "//tensorflow/core/kernels:word2vec_kernels", ] + tf_additional_cloud_kernel_deps() + if_not_windows([ diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index cb7e3b3316..174ccde8b7 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -4665,6 +4665,8 @@ filegroup( "whole_file_read_ops.*", "sample_distorted_bounding_box_op.*", "ctc_loss_op.*", + "summary_interface.*", + "summary_kernels.*", "spectrogram_convert_test_data.cc", "sql_dataset_ops.cc", # Excluded due to experimental status: @@ -5954,6 +5956,43 @@ tf_kernel_library( ], ) +cc_library( + name = "summary_interface", + srcs = ["summary_interface.cc"], + hdrs = ["summary_interface.h"], + deps = [ + "//tensorflow/compiler/xla:util", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:proto_text", + "//tensorflow/core:protos_all_cc", + ], +) + +cc_test( + name = "summary_interface_test", + srcs = ["summary_interface_test.cc"], + deps = [ + ":summary_interface", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_kernel_library( + name = "summary_kernels", + srcs = ["summary_kernels.cc"], + deps = [ + ":summary_interface", + "//tensorflow/core:framework", + "//tensorflow/core:summary_ops_op_lib", + ], +) + # ----------------------------------------------------------------------------- # Google-internal targets. These must be at the end for syncrepo. diff --git a/tensorflow/core/kernels/summary_interface.cc b/tensorflow/core/kernels/summary_interface.cc new file mode 100644 index 0000000000..19e0f702f9 --- /dev/null +++ b/tensorflow/core/kernels/summary_interface.cc @@ -0,0 +1,432 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/framework/summary.pb.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/summary_interface.h" +#include "tensorflow/core/lib/histogram/histogram.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/png/png_io.h" +#include "tensorflow/core/lib/wav/wav_io.h" +#include "tensorflow/core/util/event.pb.h" +#include "tensorflow/core/util/events_writer.h" + +namespace tensorflow { +namespace { +template +Status TensorValueAt(Tensor t, int index, T* out) { + switch (t.dtype()) { + case DT_FLOAT: + *out = t.flat()(index); + break; + case DT_DOUBLE: + *out = t.flat()(index); + break; + case DT_HALF: + *out = T(t.flat()(index)); + break; + case DT_INT32: + *out = t.flat()(index); + break; + case DT_UINT8: + *out = t.flat()(index); + break; + case DT_INT16: + *out = t.flat()(index); + break; + case DT_INT8: + *out = t.flat()(index); + break; + case DT_BOOL: + *out = t.flat()(index); + break; + case DT_INT64: + *out = t.flat()(index); + break; + default: + return errors::Unimplemented("Scalar summary for dtype ", + DataTypeString(t.dtype()), + " is not supported."); + } + return Status::OK(); +} + +typedef Eigen::Tensor Uint8Image; + +// Add the sequence of images specified by ith_image to the summary. +// +// Factoring this loop out into a helper function lets ith_image behave +// differently in the float and uint8 cases: the float case needs a temporary +// buffer which can be shared across calls to ith_image, but the uint8 case +// does not. +Status AddImages(const string& tag, int max_images, int batch_size, int w, + int h, int depth, + const std::function& ith_image, Summary* s) { + const int N = std::min(max_images, batch_size); + for (int i = 0; i < N; ++i) { + Summary::Value* v = s->add_value(); + // The tag depends on the number of requested images (not the number + // produced.) + // + // Note that later on avisu uses "/" to figure out a consistent naming + // convention for display, so we append "/image" to guarantee that the + // image(s) won't be displayed in the global scope with no name. + if (max_images > 1) { + v->set_tag(strings::StrCat(tag, "/image/", i)); + } else { + v->set_tag(strings::StrCat(tag, "/image")); + } + + auto image = ith_image(i); + Summary::Image* si = v->mutable_image(); + si->set_height(h); + si->set_width(w); + si->set_colorspace(depth); + const int channel_bits = 8; + const int compression = -1; // Use zlib default + if (!png::WriteImageToBuffer(image.data(), w, h, w * depth, depth, + channel_bits, compression, + si->mutable_encoded_image_string(), nullptr)) { + return errors::Internal("PNG encoding failed"); + } + } + return Status::OK(); +} + +template +void NormalizeFloatImage(int hw, int depth, + typename TTypes::ConstMatrix values, + typename TTypes::ConstVec bad_color, + Uint8Image* image) { + if (!image->size()) return; // Nothing to do for empty images + + // Rescale the image to uint8 range. + // + // We are trying to generate an RGB image from a float/half tensor. We do + // not have any info about the expected range of values in the tensor + // but the generated image needs to have all RGB values within [0, 255]. + // + // We use two different algorithms to generate these values. If the + // tensor has only positive values we scale them all by 255/max(values). + // If the tensor has both negative and positive values we scale them by + // the max of their absolute values and center them around 127. + // + // This works for most cases, but does not respect the relative dynamic + // range across different instances of the tensor. + + // Compute min and max ignoring nonfinite pixels + float image_min = std::numeric_limits::infinity(); + float image_max = -image_min; + for (int i = 0; i < hw; i++) { + bool finite = true; + for (int j = 0; j < depth; j++) { + if (!Eigen::numext::isfinite(values(i, j))) { + finite = false; + break; + } + } + if (finite) { + for (int j = 0; j < depth; j++) { + float value(values(i, j)); + image_min = std::min(image_min, value); + image_max = std::max(image_max, value); + } + } + } + + // Pick an affine transform into uint8 + const float kZeroThreshold = 1e-6; + T scale, offset; + if (image_min < 0) { + float max_val = std::max(std::abs(image_min), std::abs(image_max)); + scale = T(max_val < kZeroThreshold ? 0.0f : 127.0f / max_val); + offset = T(128.0f); + } else { + scale = T(image_max < kZeroThreshold ? 0.0f : 255.0f / image_max); + offset = T(0.0f); + } + + // Transform image, turning nonfinite values to bad_color + for (int i = 0; i < hw; i++) { + bool finite = true; + for (int j = 0; j < depth; j++) { + if (!Eigen::numext::isfinite(values(i, j))) { + finite = false; + break; + } + } + if (finite) { + image->chip<0>(i) = + (values.template chip<0>(i) * scale + offset).template cast(); + } else { + image->chip<0>(i) = bad_color; + } + } +} + +template +Status NormalizeAndAddImages(const Tensor& tensor, int max_images, int h, int w, + int hw, int depth, int batch_size, + const string& base_tag, Tensor bad_color_tensor, + Summary* s) { + // For float and half images, nans and infs are replaced with bad_color. + if (bad_color_tensor.dim_size(0) < depth) { + return errors::InvalidArgument( + "expected depth <= bad_color.size, got depth = ", depth, + ", bad_color.size = ", bad_color_tensor.dim_size(0)); + } + auto bad_color_full = bad_color_tensor.vec(); + typename TTypes::ConstVec bad_color(bad_color_full.data(), depth); + + // Float images must be scaled and translated. + Uint8Image image(hw, depth); + auto ith_image = [&tensor, &image, bad_color, batch_size, hw, depth](int i) { + auto tensor_eigen = tensor.template shaped({batch_size, hw, depth}); + typename TTypes::ConstMatrix values( + &tensor_eigen(i, 0, 0), Eigen::DSizes(hw, depth)); + NormalizeFloatImage(hw, depth, values, bad_color, &image); + return image; + }; + return AddImages(base_tag, max_images, batch_size, w, h, depth, ith_image, s); +} + +} // namespace + +class SummaryWriterImpl : public SummaryWriterInterface { + public: + SummaryWriterImpl(int max_queue, int flush_millis) + : SummaryWriterInterface(), + max_queue_(max_queue), + flush_millis_(flush_millis) {} + + Status Initialize(const string& logdir, const string& filename_suffix, + Env* env) { + Status is_dir = env->IsDirectory(logdir); + if (!is_dir.ok()) { + if (is_dir.code() != tensorflow::error::NOT_FOUND) { + return is_dir; + } + TF_RETURN_IF_ERROR(env->CreateDir(logdir)); + } + mutex_lock ml(mu_); + events_writer_ = + xla::MakeUnique(io::JoinPath(logdir, "events")); + if (!events_writer_->InitWithSuffix(filename_suffix)) { + return errors::Unknown("Could not initialize events writer."); + } + last_flush_ = Env::Default()->NowMicros(); + return Status::OK(); + } + + Status Flush() override { + mutex_lock ml(mu_); + return InternalFlush(); + } + + ~SummaryWriterImpl() override { + (void)Flush(); // Ignore errors. + } + + Status WriteTensor(int64 global_step, Tensor t, const string& tag, + const string& serialized_metadata) override { + Summary s; + Summary::Value* v = s.add_value(); + t.AsProtoTensorContent(v->mutable_tensor()); + v->set_tag(tag); + v->mutable_metadata()->ParseFromString(serialized_metadata); + return Enqueue(global_step, s); + } + + Status WriteScalar(int64 global_step, Tensor t, const string& tag) override { + Summary s; + Summary::Value* v = s.add_value(); + v->set_tag(tag); + float value; + TF_RETURN_IF_ERROR(TensorValueAt(t, 0, &value)); + v->set_simple_value(value); + return Enqueue(global_step, s); + } + + Status WriteHistogram(int64 global_step, Tensor t, + const string& tag) override { + Summary s; + Summary::Value* v = s.add_value(); + v->set_tag(tag); + histogram::Histogram histo; + for (int64 i = 0; i < t.NumElements(); i++) { + double double_val; + TF_RETURN_IF_ERROR(TensorValueAt(t, i, &double_val)); + if (Eigen::numext::isnan(double_val)) { + return errors::InvalidArgument("Nan in summary histogram for: ", tag); + } else if (Eigen::numext::isinf(double_val)) { + return errors::InvalidArgument("Infinity in summary histogram for: ", + tag); + } + histo.Add(double_val); + } + + histo.EncodeToProto(v->mutable_histo(), false /* Drop zero buckets */); + return Enqueue(global_step, s); + } + + Status WriteImage(int64 global_step, Tensor tensor, const string& tag, + int max_images, Tensor bad_color) override { + if (!(tensor.dims() == 4 && + (tensor.dim_size(3) == 1 || tensor.dim_size(3) == 3 || + tensor.dim_size(3) == 4))) { + return errors::InvalidArgument( + "Tensor must be 4-D with last dim 1, 3, or 4, not ", + tensor.shape().DebugString()); + } + if (!(tensor.dim_size(0) < (1LL << 31) && + tensor.dim_size(1) < (1LL << 31) && + tensor.dim_size(2) < (1LL << 31) && + (tensor.dim_size(1) * tensor.dim_size(2)) < (1LL << 29))) { + return errors::InvalidArgument("Tensor too large for summary ", + tensor.shape().DebugString()); + } + Summary s; + // The casts and h * w cannot overflow because of the limits above. + const int batch_size = static_cast(tensor.dim_size(0)); + const int h = static_cast(tensor.dim_size(1)); + const int w = static_cast(tensor.dim_size(2)); + const int hw = h * w; // Compact these two dims for simplicity + const int depth = static_cast(tensor.dim_size(3)); + if (tensor.dtype() == DT_UINT8) { + // For uint8 input, no normalization is necessary + auto ith_image = [&tensor, batch_size, hw, depth](int i) { + auto values = tensor.shaped({batch_size, hw, depth}); + return typename TTypes::ConstMatrix( + &values(i, 0, 0), Eigen::DSizes(hw, depth)); + }; + TF_RETURN_IF_ERROR( + AddImages(tag, max_images, batch_size, w, h, depth, ith_image, &s)); + } else if (tensor.dtype() == DT_HALF) { + TF_RETURN_IF_ERROR(NormalizeAndAddImages( + tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, &s)); + } else if (tensor.dtype() == DT_FLOAT) { + TF_RETURN_IF_ERROR(NormalizeAndAddImages( + tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, &s)); + } else { + return errors::InvalidArgument( + "Only DT_INT8, DT_HALF, and DT_FLOAT images are supported. Got ", + DataTypeString(tensor.dtype())); + } + + return Enqueue(global_step, s); + } + + Status WriteAudio(int64 global_step, Tensor tensor, const string& tag, + int max_outputs, float sample_rate) override { + if (sample_rate <= 0.0f) { + return errors::InvalidArgument("sample_rate must be > 0"); + } + const int batch_size = tensor.dim_size(0); + const int64 length_frames = tensor.dim_size(1); + const int64 num_channels = + tensor.dims() == 2 ? 1 : tensor.dim_size(tensor.dims() - 1); + Summary s; + const int N = std::min(max_outputs, batch_size); + for (int i = 0; i < N; ++i) { + Summary::Value* v = s.add_value(); + if (max_outputs > 1) { + v->set_tag(strings::StrCat(tag, "/audio/", i)); + } else { + v->set_tag(strings::StrCat(tag, "/audio")); + } + + Summary::Audio* sa = v->mutable_audio(); + sa->set_sample_rate(sample_rate); + sa->set_num_channels(num_channels); + sa->set_length_frames(length_frames); + sa->set_content_type("audio/wav"); + + auto values = + tensor.shaped({batch_size, length_frames, num_channels}); + auto channels_by_frames = typename TTypes::ConstMatrix( + &values(i, 0, 0), + Eigen::DSizes(length_frames, num_channels)); + size_t sample_rate_truncated = lrintf(sample_rate); + if (sample_rate_truncated == 0) { + sample_rate_truncated = 1; + } + TF_RETURN_IF_ERROR(wav::EncodeAudioAsS16LEWav( + channels_by_frames.data(), sample_rate_truncated, num_channels, + length_frames, sa->mutable_encoded_audio_string())); + } + + return Enqueue(global_step, s); + } + + string DebugString() override { return "SummaryWriterImpl"; } + + private: + Status Enqueue(int64 global_step, const Summary& summary) { + mutex_lock ml(mu_); + queue_.emplace_back(global_step, summary, Env::Default()->NowMicros()); + if (queue_.size() >= max_queue_ || + Env::Default()->NowMicros() - last_flush_ > 1000 * flush_millis_) { + return InternalFlush(); + } + return Status::OK(); + } + + Status InternalFlush() EXCLUSIVE_LOCKS_REQUIRED(mu_) { + for (const EventInfo& e : queue_) { + Event event; + event.set_step(std::get<0>(e)); + *event.mutable_summary() = std::get<1>(e); + event.set_wall_time(std::get<2>(e)); + events_writer_->WriteEvent(event); + } + queue_.clear(); + if (!events_writer_->Flush()) { + return errors::InvalidArgument("Could not flush events file."); + } + last_flush_ = Env::Default()->NowMicros(); + return Status::OK(); + } + + const int max_queue_; + const int flush_millis_; + uint64 last_flush_; + using EventInfo = std::tuple; + mutex mu_; + std::vector queue_ GUARDED_BY(mu_); + // A pointer to allow deferred construction. + std::unique_ptr events_writer_ GUARDED_BY(mu_); + std::vector> registered_summaries_ + GUARDED_BY(mu_); +}; + +Status CreateSummaryWriter(int max_queue, int flush_millis, + const string& logdir, const string& filename_suffix, + Env* env, SummaryWriterInterface** result) { + SummaryWriterImpl* w = new SummaryWriterImpl(max_queue, flush_millis); + Status s = w->Initialize(logdir, filename_suffix, env); + if (!s.ok()) { + w->Unref(); + *result = nullptr; + return s; + } + *result = w; + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/summary_interface.h b/tensorflow/core/kernels/summary_interface.h new file mode 100644 index 0000000000..b0861d218e --- /dev/null +++ b/tensorflow/core/kernels/summary_interface.h @@ -0,0 +1,59 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_KERNELS_SUMMARY_INTERFACE_H_ +#define TENSORFLOW_CORE_KERNELS_SUMMARY_INTERFACE_H_ + + +#include "tensorflow/core/framework/resource_mgr.h" + +namespace tensorflow { + +// Main interface for the summary writer resource. +class SummaryWriterInterface : public ResourceBase { + public: + virtual ~SummaryWriterInterface() override {} + + // Flushes all unwritten messages in the queue. + virtual Status Flush() = 0; + + // These are called in the OpKernel::Compute methods for the summary ops. + virtual Status WriteTensor(int64 global_step, Tensor t, const string& tag, + const string& serialized_metadata) = 0; + + virtual Status WriteScalar(int64 global_step, Tensor t, + const string& tag) = 0; + + virtual Status WriteHistogram(int64 global_step, Tensor t, + const string& tag) = 0; + + virtual Status WriteImage(int64 global_step, Tensor t, const string& tag, + int max_images, Tensor bad_color) = 0; + + virtual Status WriteAudio(int64 global_step, Tensor t, const string& tag, + int max_outputs_, float sample_rate) = 0; +}; + +// Creates a SummaryWriterInterface instance which writes to a file. It will +// enqueue up to max_queue summaries, and flush at least every flush_millis +// milliseconds. The summaries will be written to the directory specified by +// logdir and with the filename suffixed by filename_suffix. The caller ows a +// reference to result if the returned status is ok. +Status CreateSummaryWriter(int max_queue, int flush_millis, + const string& logdir, const string& filename_suffix, + Env* env, SummaryWriterInterface** result); + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_SUMMARY_INTERFACE_H_ diff --git a/tensorflow/core/kernels/summary_interface_test.cc b/tensorflow/core/kernels/summary_interface_test.cc new file mode 100644 index 0000000000..66bde2cb06 --- /dev/null +++ b/tensorflow/core/kernels/summary_interface_test.cc @@ -0,0 +1,167 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/core/framework/summary.pb.h" +#include "tensorflow/core/kernels/summary_interface.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/refcount.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/util/event.pb.h" + +namespace tensorflow { +namespace { + +Status SummaryTestHelper( + const string& test_name, + std::function writer_fn, + std::function test_fn) { + SummaryWriterInterface* writer; + Env* env = Env::Default(); + TF_CHECK_OK( + CreateSummaryWriter(1, 1, testing::TmpDir(), test_name, env, &writer)); + core::ScopedUnref deleter(writer); + + TF_CHECK_OK(writer_fn(writer)); + TF_CHECK_OK(writer->Flush()); + + std::vector files; + TF_CHECK_OK(env->GetChildren(testing::TmpDir(), &files)); + bool found = false; + for (const string& f : files) { + if (StringPiece(f).contains(test_name)) { + if (found) { + return errors::Unknown("Found more than one file for ", test_name); + } + found = true; + std::unique_ptr read_file; + TF_CHECK_OK(env->NewRandomAccessFile(io::JoinPath(testing::TmpDir(), f), + &read_file)); + io::RecordReader reader(read_file.get(), io::RecordReaderOptions()); + string record; + uint64 offset = 0; + TF_CHECK_OK(reader.ReadRecord(&offset, + &record)); // The first event is irrelevant + TF_CHECK_OK(reader.ReadRecord(&offset, &record)); + Event e; + e.ParseFromString(record); + test_fn(e); + } + } + if (!found) { + return errors::Unknown("Found no file for ", test_name); + } + return Status::OK(); +} + +TEST(SummaryInterfaceTest, WriteTensor) { + TF_CHECK_OK(SummaryTestHelper("tensor_test", + [](SummaryWriterInterface* writer) { + Tensor one(DT_FLOAT, TensorShape({})); + one.scalar()() = 1.0; + TF_RETURN_IF_ERROR(writer->WriteTensor( + 2, one, "name", + SummaryMetadata().SerializeAsString())); + TF_RETURN_IF_ERROR(writer->Flush()); + return Status::OK(); + }, + [](const Event& e) { + EXPECT_EQ(e.step(), 2); + CHECK_EQ(e.summary().value_size(), 1); + EXPECT_EQ(e.summary().value(0).tag(), "name"); + })); +} + +TEST(SummaryInterfaceTest, WriteScalar) { + TF_CHECK_OK(SummaryTestHelper( + "scalar_test", + [](SummaryWriterInterface* writer) { + Tensor one(DT_FLOAT, TensorShape({})); + one.scalar()() = 1.0; + TF_RETURN_IF_ERROR(writer->WriteScalar(2, one, "name")); + TF_RETURN_IF_ERROR(writer->Flush()); + return Status::OK(); + }, + [](const Event& e) { + EXPECT_EQ(e.step(), 2); + CHECK_EQ(e.summary().value_size(), 1); + EXPECT_EQ(e.summary().value(0).tag(), "name"); + EXPECT_EQ(e.summary().value(0).simple_value(), 1.0); + })); +} + +TEST(SummaryInterfaceTest, WriteHistogram) { + TF_CHECK_OK(SummaryTestHelper("hist_test", + [](SummaryWriterInterface* writer) { + Tensor one(DT_FLOAT, TensorShape({})); + one.scalar()() = 1.0; + TF_RETURN_IF_ERROR( + writer->WriteHistogram(2, one, "name")); + TF_RETURN_IF_ERROR(writer->Flush()); + return Status::OK(); + }, + [](const Event& e) { + EXPECT_EQ(e.step(), 2); + CHECK_EQ(e.summary().value_size(), 1); + EXPECT_EQ(e.summary().value(0).tag(), "name"); + EXPECT_TRUE(e.summary().value(0).has_histo()); + })); +} + +TEST(SummaryInterfaceTest, WriteImage) { + TF_CHECK_OK(SummaryTestHelper( + "image_test", + [](SummaryWriterInterface* writer) { + Tensor one(DT_UINT8, TensorShape({1, 1, 1, 1})); + one.scalar()() = 1; + TF_RETURN_IF_ERROR(writer->WriteImage(2, one, "name", 1, Tensor())); + TF_RETURN_IF_ERROR(writer->Flush()); + return Status::OK(); + }, + [](const Event& e) { + EXPECT_EQ(e.step(), 2); + CHECK_EQ(e.summary().value_size(), 1); + EXPECT_EQ(e.summary().value(0).tag(), "name/image"); + CHECK(e.summary().value(0).has_image()); + EXPECT_EQ(e.summary().value(0).image().height(), 1); + EXPECT_EQ(e.summary().value(0).image().width(), 1); + EXPECT_EQ(e.summary().value(0).image().colorspace(), 1); + })); +} + +TEST(SummaryInterfaceTest, WriteAudio) { + TF_CHECK_OK(SummaryTestHelper( + "scalar_test", + [](SummaryWriterInterface* writer) { + Tensor one(DT_FLOAT, TensorShape({1, 1})); + one.scalar()() = 1.0; + TF_RETURN_IF_ERROR(writer->WriteAudio(2, one, "name", 1, 1)); + TF_RETURN_IF_ERROR(writer->Flush()); + return Status::OK(); + }, + [](const Event& e) { + EXPECT_EQ(e.step(), 2); + CHECK_EQ(e.summary().value_size(), 1); + EXPECT_EQ(e.summary().value(0).tag(), "name/audio"); + CHECK(e.summary().value(0).has_audio()); + })); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc new file mode 100644 index 0000000000..d0eca0f1e7 --- /dev/null +++ b/tensorflow/core/kernels/summary_kernels.cc @@ -0,0 +1,226 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/kernels/summary_interface.h" + +namespace tensorflow { + +REGISTER_KERNEL_BUILDER(Name("SummaryWriter").Device(DEVICE_CPU), + ResourceHandleOp); + +class CreateSummaryFileWriterOp : public OpKernel { + public: + explicit CreateSummaryFileWriterOp(OpKernelConstruction* ctx) + : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("logdir", &tmp)); + const string logdir = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("max_queue", &tmp)); + const int32 max_queue = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("flush_millis", &tmp)); + const int32 flush_millis = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("filename_suffix", &tmp)); + const string filename_suffix = tmp->scalar()(); + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, CreateSummaryWriter(max_queue, flush_millis, logdir, + filename_suffix, ctx->env(), &s)); + Status status = CreateResource(ctx, HandleFromInput(ctx, 0), s); + if (!status.ok()) { + s->Unref(); + ctx->SetStatus(status); + return; + } + } +}; +REGISTER_KERNEL_BUILDER(Name("CreateSummaryFileWriter").Device(DEVICE_CPU), + CreateSummaryFileWriterOp); + +class FlushSummaryWriterOp : public OpKernel { + public: + explicit FlushSummaryWriterOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + OP_REQUIRES_OK(ctx, s->Flush()); + } +}; +REGISTER_KERNEL_BUILDER(Name("FlushSummaryWriter").Device(DEVICE_CPU), + FlushSummaryWriterOp); + +class CloseSummaryWriterOp : public OpKernel { + public: + explicit CloseSummaryWriterOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + OP_REQUIRES_OK(ctx, DeleteResource( + ctx, HandleFromInput(ctx, 0))); + } +}; +REGISTER_KERNEL_BUILDER(Name("CloseSummaryWriter").Device(DEVICE_CPU), + CloseSummaryWriterOp); + +class WriteSummaryOp : public OpKernel { + public: + explicit WriteSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); + const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); + const string& tag = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("summary_metadata", &tmp)); + const string& serialized_metadata = tmp->scalar()(); + + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); + + OP_REQUIRES_OK(ctx, + s->WriteTensor(global_step, *t, tag, serialized_metadata)); + } +}; +REGISTER_KERNEL_BUILDER(Name("WriteSummary").Device(DEVICE_CPU), + WriteSummaryOp); + +class WriteScalarSummaryOp : public OpKernel { + public: + explicit WriteScalarSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); + const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); + const string& tag = tmp->scalar()(); + + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("value", &t)); + + OP_REQUIRES_OK(ctx, s->WriteScalar(global_step, *t, tag)); + } +}; +REGISTER_KERNEL_BUILDER(Name("WriteScalarSummary").Device(DEVICE_CPU), + WriteScalarSummaryOp); + +class WriteHistogramSummaryOp : public OpKernel { + public: + explicit WriteHistogramSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); + const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); + const string& tag = tmp->scalar()(); + + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("values", &t)); + + OP_REQUIRES_OK(ctx, s->WriteHistogram(global_step, *t, tag)); + } +}; +REGISTER_KERNEL_BUILDER(Name("WriteHistogramSummary").Device(DEVICE_CPU), + WriteHistogramSummaryOp); + +class WriteImageSummaryOp : public OpKernel { + public: + explicit WriteImageSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + int64 max_images_tmp; + OP_REQUIRES_OK(ctx, ctx->GetAttr("max_images", &max_images_tmp)); + OP_REQUIRES(ctx, max_images_tmp < (1LL << 31), + errors::InvalidArgument("max_images must be < 2^31")); + max_images_ = static_cast(max_images_tmp); + } + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); + const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); + const string& tag = tmp->scalar()(); + const Tensor* bad_color; + OP_REQUIRES_OK(ctx, ctx->input("bad_color", &bad_color)); + OP_REQUIRES( + ctx, TensorShapeUtils::IsVector(bad_color->shape()), + errors::InvalidArgument("bad_color must be a vector, got shape ", + bad_color->shape().DebugString())); + + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); + + OP_REQUIRES_OK( + ctx, s->WriteImage(global_step, *t, tag, max_images_, *bad_color)); + } + + private: + int32 max_images_; +}; +REGISTER_KERNEL_BUILDER(Name("WriteImageSummary").Device(DEVICE_CPU), + WriteImageSummaryOp); + +class WriteAudioSummaryOp : public OpKernel { + public: + explicit WriteAudioSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("max_outputs", &max_outputs_)); + OP_REQUIRES(ctx, max_outputs_ > 0, + errors::InvalidArgument("max_outputs must be > 0")); + } + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); + const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); + const string& tag = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("sample_rate", &tmp)); + const float sample_rate = tmp->scalar()(); + + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); + + OP_REQUIRES_OK( + ctx, s->WriteAudio(global_step, *t, tag, max_outputs_, sample_rate)); + } + + private: + int max_outputs_; + bool has_sample_rate_attr_; + float sample_rate_attr_; +}; +REGISTER_KERNEL_BUILDER(Name("WriteAudioSummary").Device(DEVICE_CPU), + WriteAudioSummaryOp); + +} // namespace tensorflow diff --git a/tensorflow/core/ops/summary_ops.cc b/tensorflow/core/ops/summary_ops.cc new file mode 100644 index 0000000000..f778b48797 --- /dev/null +++ b/tensorflow/core/ops/summary_ops.cc @@ -0,0 +1,218 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); + +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("SummaryWriter") + .Output("writer: resource") + .Attr("shared_name: string = ''") + .Attr("container: string = ''") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Returns a handle to be used to access a summary writer. + +The summary writer is an in-graph resource which can be used by ops to write +summaries to event files. + +writer: the summary writer resource. Scalar handle. +)doc"); + +REGISTER_OP("CreateSummaryFileWriter") + .Input("writer: resource") + .Input("logdir: string") + .Input("max_queue: int32") + .Input("flush_millis: int32") + .Input("filename_suffix: string") + .Doc(R"doc( +Creates a summary file writer accessible by the given resource handle. + +writer: A handle to the summary writer resource +logdir: Directory where the event file will be written. +max_queue: Size of the queue of pending events and summaries. +flush_millis: How often, in milliseconds, to flush the pending events and + summaries to disk. +filename_suffix: Every event file's name is suffixed with this suffix. +)doc"); + +REGISTER_OP("FlushSummaryWriter") + .Input("writer: resource") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"( +Flushes the writer's unwritten events. + +writer: A handle to the summary writer resource. +)"); + +REGISTER_OP("CloseSummaryWriter") + .Input("writer: resource") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"( +Flushes and closes the summary writer. + +Also removes it from the resource manager. To reopen, use another +CreateSummaryFileWriter op. + +writer: A handle to the summary writer resource. +)"); + +REGISTER_OP("WriteSummary") + .Input("writer: resource") + .Input("global_step: int64") + .Input("tensor: T") + .Input("tag: string") + .Input("summary_metadata: string") + .Attr("T: type") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Outputs a `Summary` protocol buffer with a tensor. + +writer: A handle to a summary writer. +global_step: The step to write the summary for. +tensor: A tensor to serialize. +tag: The summary's tag. +summary_metadata: Serialized SummaryMetadata protocol buffer containing + plugin-related metadata for this summary. +)doc"); + +REGISTER_OP("WriteScalarSummary") + .Input("writer: resource") + .Input("global_step: int64") + .Input("tag: string") + .Input("value: T") + .Attr("T: realnumbertype") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Writes a `Summary` protocol buffer with scalar values. + +The input `tag` and `value` must have the scalars. + +writer: A handle to a summary writer. +global_step: The step to write the summary for. +tag: Tag for the summary. +value: Value for the summary. +)doc"); + +REGISTER_OP("WriteHistogramSummary") + .Input("writer: resource") + .Input("global_step: int64") + .Input("tag: string") + .Input("values: T") + .Attr("T: realnumbertype = DT_FLOAT") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Writes a `Summary` protocol buffer with a histogram. + +The generated +[`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +has one summary value containing a histogram for `values`. + +This op reports an `InvalidArgument` error if any value is not finite. + +writer: A handle to a summary writer. +global_step: The step to write the summary for. +tag: Scalar. Tag to use for the `Summary.Value`. +values: Any shape. Values to use to build the histogram. +)doc"); + +REGISTER_OP("WriteImageSummary") + .Input("writer: resource") + .Input("global_step: int64") + .Input("tag: string") + .Input("tensor: T") + .Input("bad_color: uint8") + .Attr("max_images: int >= 1 = 3") + .Attr("T: {uint8, float, half} = DT_FLOAT") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Writes a `Summary` protocol buffer with images. + +The summary has up to `max_images` summary values containing images. The +images are built from `tensor` which must be 4-D with shape `[batch_size, +height, width, channels]` and where `channels` can be: + +* 1: `tensor` is interpreted as Grayscale. +* 3: `tensor` is interpreted as RGB. +* 4: `tensor` is interpreted as RGBA. + +The images have the same number of channels as the input tensor. For float +input, the values are normalized one image at a time to fit in the range +`[0, 255]`. `uint8` values are unchanged. The op uses two different +normalization algorithms: + +* If the input values are all positive, they are rescaled so the largest one + is 255. + +* If any input value is negative, the values are shifted so input value 0.0 + is at 127. They are then rescaled so that either the smallest value is 0, + or the largest one is 255. + +The `tag` argument is a scalar `Tensor` of type `string`. It is used to +build the `tag` of the summary values: + +* If `max_images` is 1, the summary value tag is '*tag*/image'. +* If `max_images` is greater than 1, the summary value tags are + generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. + +The `bad_color` argument is the color to use in the generated images for +non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +Each element must be in the range `[0, 255]` (It represents the value of a +pixel in the output image). Non-finite values in the input tensor are +replaced by this tensor in the output image. The default value is the color +red. + +writer: A handle to a summary writer. +global_step: The step to write the summary for. +tag: Scalar. Used to build the `tag` attribute of the summary values. +tensor: 4-D of shape `[batch_size, height, width, channels]` where + `channels` is 1, 3, or 4. +max_images: Max number of batch elements to generate images for. +bad_color: Color to use for pixels with non-finite values. +)doc"); + +REGISTER_OP("WriteAudioSummary") + .Input("writer: resource") + .Input("global_step: int64") + .Input("tag: string") + .Input("tensor: float") + .Input("sample_rate: float") + .Attr("max_outputs: int >= 1 = 3") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Writes a `Summary` protocol buffer with audio. + +The summary has up to `max_outputs` summary values containing audio. The +audio is built from `tensor` which must be 3-D with shape `[batch_size, +frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. + +The `tag` argument is a scalar `Tensor` of type `string`. It is used to +build the `tag` of the summary values: + +* If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +* If `max_outputs` is greater than 1, the summary value tags are + generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. + +writer: A handle to a summary writer. +global_step: The step to write the summary for. +tag: Scalar. Used to build the `tag` attribute of the summary values. +tensor: 2-D of shape `[batch_size, frames]`. +sample_rate: The sample rate of the signal in hertz. +max_outputs: Max number of batch elements to generate audio for. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 27ffdd9810..a5a93b7bbe 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -171,16 +171,6 @@ class Context(object): """Sets summary writer resource.""" self._summary_writer_resource = resource - @property - def recording_summaries(self): - """Returns True if recording summaries is enabled in current thread..""" - return self._eager_context.recording_summaries - - @recording_summaries.setter - def recording_summaries(self, val): - """Enables recording summaries is enabled in current thread..""" - self._eager_context.recording_summaries = val - @property def device_name(self): """Returns the device name for the current thread.""" @@ -360,24 +350,6 @@ def device(name): return context().device(name) -@contextlib.contextmanager -def record_summaries(): - """Context-manager to enable recording of summaries.""" - ctx = context() - old = ctx.recording_summaries - ctx.recording_summaries = True - try: - yield - finally: - ctx.recording_summaries = old - - -def should_record_summary(): - """True if a summary should be recorded now.""" - c = context() - return c.recording_summaries and c.summary_writer_resource is not None - - def run(main=None, argv=None): """Runs the program with an optional 'main' function and 'argv' list. diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index 7ae80aa156..5de396f62c 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -55,10 +55,6 @@ class TFETest(test_util.TensorFlowTestCase): ctx.summary_writer_resource = 'mock' self.assertEqual('mock', ctx.summary_writer_resource) - self.assertFalse(ctx.recording_summaries) - ctx.recording_summaries = True - self.assertTrue(ctx.recording_summaries) - self.assertEqual('', ctx.device_name) self.assertEqual(ctx.device_name, ctx.device_spec.to_string()) with ctx.device('GPU:0'): @@ -95,8 +91,7 @@ class TFETest(test_util.TensorFlowTestCase): return [ ctx.in_graph_mode(), ctx.in_eager_mode(), ctx.scope_name, ctx.summary_writer_resource, - ctx.recording_summaries, ctx.device_name, - ctx.num_gpus() + ctx.device_name, ctx.num_gpus() ] def get_values(ctx, values): -- GitLab From c54cf14ce2c8c45cb3d8dc2cdd51b068c7f09e55 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 16:59:50 -0700 Subject: [PATCH 182/294] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 167342818 --- tensorflow/go/op/wrappers.go | 316 +++++++++++++++++++++++++++++++++++ 1 file changed, 316 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 9583ffb38b..dda707aea2 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -209,6 +209,95 @@ func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...Va return op.Output(0) } +// Writes a `Summary` protocol buffer with scalar values. +// +// The input `tag` and `value` must have the scalars. +// +// Arguments: +// writer: A handle to a summary writer. +// global_step: The step to write the summary for. +// tag: Tag for the summary. +// value: Value for the summary. +// +// Returns the created operation. +func WriteScalarSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, value tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "WriteScalarSummary", + Input: []tf.Input{ + writer, global_step, tag, value, + }, + } + return scope.AddOperation(opspec) +} + +// Outputs a `Summary` protocol buffer with a tensor. +// +// Arguments: +// writer: A handle to a summary writer. +// global_step: The step to write the summary for. +// tensor: A tensor to serialize. +// tag: The summary's tag. +// summary_metadata: Serialized SummaryMetadata protocol buffer containing +// plugin-related metadata for this summary. +// +// Returns the created operation. +func WriteSummary(scope *Scope, writer tf.Output, global_step tf.Output, tensor tf.Output, tag tf.Output, summary_metadata tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "WriteSummary", + Input: []tf.Input{ + writer, global_step, tensor, tag, summary_metadata, + }, + } + return scope.AddOperation(opspec) +} + +// Flushes and closes the summary writer. +// +// Also removes it from the resource manager. To reopen, use another +// CreateSummaryFileWriter op. +// +// Arguments: +// writer: A handle to the summary writer resource. +// +// Returns the created operation. +func CloseSummaryWriter(scope *Scope, writer tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "CloseSummaryWriter", + Input: []tf.Input{ + writer, + }, + } + return scope.AddOperation(opspec) +} + +// Flushes the writer's unwritten events. +// +// Arguments: +// writer: A handle to the summary writer resource. +// +// Returns the created operation. +func FlushSummaryWriter(scope *Scope, writer tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "FlushSummaryWriter", + Input: []tf.Input{ + writer, + }, + } + return scope.AddOperation(opspec) +} + // FakeQuantWithMinMaxVarsPerChannelGradientAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannelGradient. type FakeQuantWithMinMaxVarsPerChannelGradientAttr func(optionalAttr) @@ -2149,6 +2238,34 @@ func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset return offset } +// Writes a `Summary` protocol buffer with a histogram. +// +// The generated +// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +// has one summary value containing a histogram for `values`. +// +// This op reports an `InvalidArgument` error if any value is not finite. +// +// Arguments: +// writer: A handle to a summary writer. +// global_step: The step to write the summary for. +// tag: Scalar. Tag to use for the `Summary.Value`. +// values: Any shape. Values to use to build the histogram. +// +// Returns the created operation. +func WriteHistogramSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, values tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "WriteHistogramSummary", + Input: []tf.Input{ + writer, global_step, tag, values, + }, + } + return scope.AddOperation(opspec) +} + // Concatenates tensors along one dimension. // // Arguments: @@ -7087,6 +7204,48 @@ func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optio return op.Output(0) } +// SummaryWriterAttr is an optional argument to SummaryWriter. +type SummaryWriterAttr func(optionalAttr) + +// SummaryWriterSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func SummaryWriterSharedName(value string) SummaryWriterAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// SummaryWriterContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func SummaryWriterContainer(value string) SummaryWriterAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// Returns a handle to be used to access a summary writer. +// +// The summary writer is an in-graph resource which can be used by ops to write +// summaries to event files. +// +// Returns the summary writer resource. Scalar handle. +func SummaryWriter(scope *Scope, optional ...SummaryWriterAttr) (writer tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SummaryWriter", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Returns the set of files matching one or more glob patterns. // // Note that this routine only supports wildcard characters in the @@ -10570,6 +10729,61 @@ func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf. return op.Output(0) } +// WriteAudioSummaryAttr is an optional argument to WriteAudioSummary. +type WriteAudioSummaryAttr func(optionalAttr) + +// WriteAudioSummaryMaxOutputs sets the optional max_outputs attribute to value. +// +// value: Max number of batch elements to generate audio for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func WriteAudioSummaryMaxOutputs(value int64) WriteAudioSummaryAttr { + return func(m optionalAttr) { + m["max_outputs"] = value + } +} + +// Writes a `Summary` protocol buffer with audio. +// +// The summary has up to `max_outputs` summary values containing audio. The +// audio is built from `tensor` which must be 3-D with shape `[batch_size, +// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +// * If `max_outputs` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. +// +// Arguments: +// writer: A handle to a summary writer. +// global_step: The step to write the summary for. +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 2-D of shape `[batch_size, frames]`. +// sample_rate: The sample rate of the signal in hertz. +// +// Returns the created operation. +func WriteAudioSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...WriteAudioSummaryAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "WriteAudioSummary", + Input: []tf.Input{ + writer, global_step, tag, tensor, sample_rate, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. type FusedResizeAndPadConv2DAttr func(optionalAttr) @@ -15797,6 +16011,30 @@ func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64 return op.Output(0) } +// Creates a summary file writer accessible by the given resource handle. +// +// Arguments: +// writer: A handle to the summary writer resource +// logdir: Directory where the event file will be written. +// max_queue: Size of the queue of pending events and summaries. +// flush_millis: How often, in milliseconds, to flush the pending events and +// summaries to disk. +// filename_suffix: Every event file's name is suffixed with this suffix. +// +// Returns the created operation. +func CreateSummaryFileWriter(scope *Scope, writer tf.Output, logdir tf.Output, max_queue tf.Output, flush_millis tf.Output, filename_suffix tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "CreateSummaryFileWriter", + Input: []tf.Input{ + writer, logdir, max_queue, flush_millis, filename_suffix, + }, + } + return scope.AddOperation(opspec) +} + // EncodeBase64Attr is an optional argument to EncodeBase64. type EncodeBase64Attr func(optionalAttr) @@ -17172,6 +17410,84 @@ func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) ( return op.Output(0) } +// WriteImageSummaryAttr is an optional argument to WriteImageSummary. +type WriteImageSummaryAttr func(optionalAttr) + +// WriteImageSummaryMaxImages sets the optional max_images attribute to value. +// +// value: Max number of batch elements to generate images for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr { + return func(m optionalAttr) { + m["max_images"] = value + } +} + +// Writes a `Summary` protocol buffer with images. +// +// The summary has up to `max_images` summary values containing images. The +// images are built from `tensor` which must be 4-D with shape `[batch_size, +// height, width, channels]` and where `channels` can be: +// +// * 1: `tensor` is interpreted as Grayscale. +// * 3: `tensor` is interpreted as RGB. +// * 4: `tensor` is interpreted as RGBA. +// +// The images have the same number of channels as the input tensor. For float +// input, the values are normalized one image at a time to fit in the range +// `[0, 255]`. `uint8` values are unchanged. The op uses two different +// normalization algorithms: +// +// * If the input values are all positive, they are rescaled so the largest one +// is 255. +// +// * If any input value is negative, the values are shifted so input value 0.0 +// is at 127. They are then rescaled so that either the smallest value is 0, +// or the largest one is 255. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_images` is 1, the summary value tag is '*tag*/image'. +// * If `max_images` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. +// +// The `bad_color` argument is the color to use in the generated images for +// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +// Each element must be in the range `[0, 255]` (It represents the value of a +// pixel in the output image). Non-finite values in the input tensor are +// replaced by this tensor in the output image. The default value is the color +// red. +// +// Arguments: +// writer: A handle to a summary writer. +// global_step: The step to write the summary for. +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 4-D of shape `[batch_size, height, width, channels]` where +// `channels` is 1, 3, or 4. +// bad_color: Color to use for pixels with non-finite values. +// +// Returns the created operation. +func WriteImageSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "WriteImageSummary", + Input: []tf.Input{ + writer, global_step, tag, tensor, bad_color, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // Pads a tensor with zeros. // // This operation pads a `input` with zeros according to the `paddings` you -- GitLab From 88063cdfa12e24df56dacf3fd7ff1084a031489c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 17:10:45 -0700 Subject: [PATCH 183/294] Automated g4 rollback of changelist 167320150 PiperOrigin-RevId: 167344016 --- tensorflow/core/framework/bfloat16_test.cc | 3 +-- tensorflow/core/framework/numeric_types.h | 9 +-------- tensorflow/core/kernels/cast_op.h | 9 ++++++++- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc index af4e6a4411..5bd95b806f 100644 --- a/tensorflow/core/framework/bfloat16_test.cc +++ b/tensorflow/core/framework/bfloat16_test.cc @@ -23,8 +23,7 @@ namespace { TEST(Bfloat16Test, Simple) { bfloat16 a(12); - // Floating point representation of 12: 0x41400000 - EXPECT_EQ(0x4140, a.value); + EXPECT_EQ(12, a.value); } TEST(Bfloat16Test, Conversion) { diff --git a/tensorflow/core/framework/numeric_types.h b/tensorflow/core/framework/numeric_types.h index a630bee38d..31b88707e2 100644 --- a/tensorflow/core/framework/numeric_types.h +++ b/tensorflow/core/framework/numeric_types.h @@ -44,14 +44,7 @@ typedef Eigen::QUInt16 quint16; // see framework/bfloat16.h for description. struct bfloat16 { EIGEN_DEVICE_FUNC bfloat16() {} - EIGEN_DEVICE_FUNC explicit bfloat16(const float v) { - const uint16_t* p = reinterpret_cast(&v); -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - value = p[0]; -#else - value = p[1]; -#endif - } + EIGEN_DEVICE_FUNC explicit bfloat16(const uint16_t v) : value(v) {} uint16_t value; }; diff --git a/tensorflow/core/kernels/cast_op.h b/tensorflow/core/kernels/cast_op.h index 59a991b5a8..5c24f164a4 100644 --- a/tensorflow/core/kernels/cast_op.h +++ b/tensorflow/core/kernels/cast_op.h @@ -121,7 +121,14 @@ struct scalar_cast_op { typedef ::tensorflow::bfloat16 result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ::tensorflow::bfloat16 operator()( const float a) const { - return ::tensorflow::bfloat16(a); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + const uint16_t* p = reinterpret_cast(&a); + return ::tensorflow::bfloat16(p[0]); +#else + static_assert(::tensorflow::port::kLittleEndian, "Not a little endian system!"); + const uint16_t* p = reinterpret_cast(&a); + return ::tensorflow::bfloat16(p[1]); +#endif } }; -- GitLab From 1499e91856d0d7a91ad6a6128b6d45f3d81e09d0 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 1 Sep 2017 17:11:07 -0700 Subject: [PATCH 184/294] Typo PiperOrigin-RevId: 167344049 --- tensorflow/core/kernels/summary_interface.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/summary_interface.h b/tensorflow/core/kernels/summary_interface.h index b0861d218e..ae2fbb70fe 100644 --- a/tensorflow/core/kernels/summary_interface.h +++ b/tensorflow/core/kernels/summary_interface.h @@ -48,7 +48,7 @@ class SummaryWriterInterface : public ResourceBase { // Creates a SummaryWriterInterface instance which writes to a file. It will // enqueue up to max_queue summaries, and flush at least every flush_millis // milliseconds. The summaries will be written to the directory specified by -// logdir and with the filename suffixed by filename_suffix. The caller ows a +// logdir and with the filename suffixed by filename_suffix. The caller owns a // reference to result if the returned status is ok. Status CreateSummaryWriter(int max_queue, int flush_millis, const string& logdir, const string& filename_suffix, -- GitLab From 2935132bce62cbc785da68997248e449d63f5ff2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 21:48:07 -0700 Subject: [PATCH 185/294] Use the shared locks when reading the model. PiperOrigin-RevId: 167358155 --- .../boosted_trees/kernels/model_ops.cc | 4 +- .../boosted_trees/kernels/prediction_ops.cc | 4 +- .../boosted_trees/kernels/training_ops.cc | 5 +- .../python/training/functions/gbdt_batch.py | 120 +++++++++--------- 4 files changed, 70 insertions(+), 63 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/kernels/model_ops.cc b/tensorflow/contrib/boosted_trees/kernels/model_ops.cc index 42112c586a..f4ad99f779 100644 --- a/tensorflow/contrib/boosted_trees/kernels/model_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/model_ops.cc @@ -74,7 +74,7 @@ class TreeEnsembleStampTokenOp : public OpKernel { decision_tree_ensemble_resource; OP_REQUIRES_OK(context, LookupResource(context, HandleFromInput(context, 0), &decision_tree_ensemble_resource)); - mutex_lock l(*decision_tree_ensemble_resource->get_mutex()); + tf_shared_lock l(*decision_tree_ensemble_resource->get_mutex()); core::ScopedUnref unref_me(decision_tree_ensemble_resource); Tensor* output_stamp_token_t = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape(), @@ -95,7 +95,7 @@ class TreeEnsembleSerializeOp : public OpKernel { decision_tree_ensemble_resource; OP_REQUIRES_OK(context, LookupResource(context, HandleFromInput(context, 0), &decision_tree_ensemble_resource)); - mutex_lock l(*decision_tree_ensemble_resource->get_mutex()); + tf_shared_lock l(*decision_tree_ensemble_resource->get_mutex()); core::ScopedUnref unref_me(decision_tree_ensemble_resource); Tensor* output_stamp_token_t = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape(), diff --git a/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc b/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc index daca049548..8ffd7f120b 100644 --- a/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc @@ -143,7 +143,7 @@ class GradientTreesPredictionOp : public OpKernel { // Release the reference to the resource once we're done using it. core::ScopedUnref unref_me(decision_tree_ensemble_resource); if (use_locking_) { - mutex_lock l(*decision_tree_ensemble_resource->get_mutex()); + tf_shared_lock l(*decision_tree_ensemble_resource->get_mutex()); DoCompute(context, decision_tree_ensemble_resource); } else { DoCompute(context, decision_tree_ensemble_resource); @@ -334,7 +334,7 @@ class GradientTreesPartitionExamplesOp : public OpKernel { // Release the reference to the resource once we're done using it. core::ScopedUnref unref_me(decision_tree_ensemble_resource); if (use_locking_) { - mutex_lock l(*decision_tree_ensemble_resource->get_mutex()); + tf_shared_lock l(*decision_tree_ensemble_resource->get_mutex()); DoCompute(context, decision_tree_ensemble_resource); } else { DoCompute(context, decision_tree_ensemble_resource); diff --git a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc index 9e9ef1738c..d528757cf9 100644 --- a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc @@ -656,7 +656,8 @@ class GrowTreeEnsembleOp : public OpKernel { CHECK(split->split_info.split_node().node_case() != TreeNode::NODE_NOT_SET); CHECK(tree_config->nodes(node_id).node_case() == TreeNode::kLeaf) << "Unexpected node type to split " - << tree_config->nodes(node_id).node_case(); + << tree_config->nodes(node_id).node_case() << " for node_id " << node_id + << ". Tree config: " << tree_config->DebugString(); // Add left leaf. int32 left_id = tree_config->nodes_size(); @@ -767,7 +768,7 @@ class TreeEnsembleStatsOp : public OpKernel { OP_REQUIRES_OK(context, LookupResource(context, HandleFromInput(context, 0), &decision_tree_ensemble_resource)); core::ScopedUnref unref_me(decision_tree_ensemble_resource); - mutex_lock l(*decision_tree_ensemble_resource->get_mutex()); + tf_shared_lock l(*decision_tree_ensemble_resource->get_mutex()); // Get the stamp token. const Tensor* stamp_token_t; diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index 83c88a0442..2d28e0a9f1 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -407,75 +407,81 @@ class GradientBoostedDecisionTreeModel(object): local_stamp), _refresh_local_ensemble_fn, lambda: (control_flow_ops.no_op(), ensemble_stamp)) - # Once updated, Use the the local model for prediction. + # Once updated, use the local model for prediction. with ops.control_dependencies([refresh_local_ensemble]): ensemble_stats = training_ops.tree_ensemble_stats( local_ensemble_handle, ensemble_stamp) - apply_dropout, seed = _dropout_params(mode, ensemble_stats) # We don't need dropout info - we can always restore it based on the # seed. - predictions, predictions_no_dropout, _ = ( - prediction_ops.gradient_trees_prediction( - local_ensemble_handle, - seed, - self._dense_floats, - self._sparse_float_indices, - self._sparse_float_values, - self._sparse_float_shapes, - self._sparse_int_indices, - self._sparse_int_values, - self._sparse_int_shapes, - learner_config=self._learner_config_serialized, - apply_dropout=apply_dropout, - apply_averaging=apply_averaging, - use_locking=False, - center_bias=self._center_bias, - reduce_dim=self._reduce_dim)) - partition_ids = prediction_ops.gradient_trees_partition_examples( - local_ensemble_handle, - self._dense_floats, - self._sparse_float_indices, - self._sparse_float_values, - self._sparse_float_shapes, - self._sparse_int_indices, - self._sparse_int_values, - self._sparse_int_shapes, - use_locking=False) + apply_dropout, seed = _dropout_params(mode, ensemble_stats) + # Make sure ensemble stats run. This will check that the ensemble has + # the right stamp. + with ops.control_dependencies(ensemble_stats): + predictions, predictions_no_dropout, _ = ( + prediction_ops.gradient_trees_prediction( + local_ensemble_handle, + seed, + self._dense_floats, + self._sparse_float_indices, + self._sparse_float_values, + self._sparse_float_shapes, + self._sparse_int_indices, + self._sparse_int_values, + self._sparse_int_shapes, + learner_config=self._learner_config_serialized, + apply_dropout=apply_dropout, + apply_averaging=apply_averaging, + use_locking=True, + center_bias=self._center_bias, + reduce_dim=self._reduce_dim)) + partition_ids = prediction_ops.gradient_trees_partition_examples( + local_ensemble_handle, + self._dense_floats, + self._sparse_float_indices, + self._sparse_float_values, + self._sparse_float_shapes, + self._sparse_int_indices, + self._sparse_int_values, + self._sparse_int_shapes, + use_locking=True) else: with ops.device(self._ensemble_handle.device): ensemble_stats = training_ops.tree_ensemble_stats( self._ensemble_handle, ensemble_stamp) - apply_dropout, seed = _dropout_params(mode, ensemble_stats) # We don't need dropout info - we can always restore it based on the # seed. - predictions, predictions_no_dropout, _ = ( - prediction_ops.gradient_trees_prediction( - self._ensemble_handle, - seed, - self._dense_floats, - self._sparse_float_indices, - self._sparse_float_values, - self._sparse_float_shapes, - self._sparse_int_indices, - self._sparse_int_values, - self._sparse_int_shapes, - learner_config=self._learner_config_serialized, - apply_dropout=apply_dropout, - apply_averaging=apply_averaging, - use_locking=False, - center_bias=self._center_bias, - reduce_dim=self._reduce_dim)) - partition_ids = prediction_ops.gradient_trees_partition_examples( - self._ensemble_handle, - self._dense_floats, - self._sparse_float_indices, - self._sparse_float_values, - self._sparse_float_shapes, - self._sparse_int_indices, - self._sparse_int_values, - self._sparse_int_shapes, - use_locking=False) + apply_dropout, seed = _dropout_params(mode, ensemble_stats) + # Make sure ensemble stats run. This will check that the ensemble has + # the right stamp. + with ops.control_dependencies(ensemble_stats): + predictions, predictions_no_dropout, _ = ( + prediction_ops.gradient_trees_prediction( + self._ensemble_handle, + seed, + self._dense_floats, + self._sparse_float_indices, + self._sparse_float_values, + self._sparse_float_shapes, + self._sparse_int_indices, + self._sparse_int_values, + self._sparse_int_shapes, + learner_config=self._learner_config_serialized, + apply_dropout=apply_dropout, + apply_averaging=apply_averaging, + use_locking=True, + center_bias=self._center_bias, + reduce_dim=self._reduce_dim)) + partition_ids = prediction_ops.gradient_trees_partition_examples( + self._ensemble_handle, + self._dense_floats, + self._sparse_float_indices, + self._sparse_float_values, + self._sparse_float_shapes, + self._sparse_int_indices, + self._sparse_int_values, + self._sparse_int_shapes, + use_locking=True) return _make_predictions_dict(ensemble_stamp, predictions, predictions_no_dropout, partition_ids, -- GitLab From 7d5cbd78a54319eeb45bca2e239ec037997dad20 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Sep 2017 22:16:25 -0700 Subject: [PATCH 186/294] Update feature_util to support SequenceExample proto. PiperOrigin-RevId: 167359339 --- tensorflow/core/example/feature_util.cc | 124 +++++++--- tensorflow/core/example/feature_util.h | 228 +++++++++++++------ tensorflow/core/example/feature_util_test.cc | 228 ++++++++++++++++++- 3 files changed, 470 insertions(+), 110 deletions(-) diff --git a/tensorflow/core/example/feature_util.cc b/tensorflow/core/example/feature_util.cc index 6f3cc6c6c5..f0593ede82 100644 --- a/tensorflow/core/example/feature_util.cc +++ b/tensorflow/core/example/feature_util.cc @@ -18,77 +18,129 @@ limitations under the License. namespace tensorflow { namespace internal { - -::tensorflow::Feature& ExampleFeature(const string& name, - ::tensorflow::Example* example) { - ::tensorflow::Features* features = example->mutable_features(); - return (*features->mutable_feature())[name]; +Feature& ExampleFeature(const string& name, Example* example) { + return *GetFeature(name, example); } -} // namespace internal +} // namespace internal template <> -bool ExampleHasFeature(const string& name, - const Example& example) { - auto it = example.features().feature().find(name); - return (it != example.features().feature().end()) && +bool HasFeature<>(const string& key, const Features& features) { + return (features.feature().find(key) != features.feature().end()); +} + +template <> +bool HasFeature(const string& key, const Features& features) { + auto it = features.feature().find(key); + return (it != features.feature().end()) && (it->second.kind_case() == Feature::KindCase::kInt64List); } template <> -bool ExampleHasFeature(const string& name, const Example& example) { - auto it = example.features().feature().find(name); - return (it != example.features().feature().end()) && +bool HasFeature(const string& key, const Features& features) { + auto it = features.feature().find(key); + return (it != features.feature().end()) && (it->second.kind_case() == Feature::KindCase::kFloatList); } template <> -bool ExampleHasFeature(const string& name, const Example& example) { - auto it = example.features().feature().find(name); - return (it != example.features().feature().end()) && +bool HasFeature(const string& key, const Features& features) { + auto it = features.feature().find(key); + return (it != features.feature().end()) && (it->second.kind_case() == Feature::KindCase::kBytesList); } +bool HasFeatureList(const string& key, + const SequenceExample& sequence_example) { + auto& feature_list = sequence_example.feature_lists().feature_list(); + return (feature_list.find(key) != feature_list.end()); +} + template <> const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example) { - return example.features().feature().at(name).int64_list().value(); + const Feature& feature) { + return feature.int64_list().value(); } template <> protobuf::RepeatedField* GetFeatureValues( - const string& name, Example* example) { - return internal::ExampleFeature(name, example) - .mutable_int64_list() - ->mutable_value(); + Feature* feature) { + return feature->mutable_int64_list()->mutable_value(); } template <> const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example) { - return example.features().feature().at(name).float_list().value(); + const Feature& feature) { + return feature.float_list().value(); } template <> -protobuf::RepeatedField* GetFeatureValues(const string& name, - Example* example) { - return internal::ExampleFeature(name, example) - .mutable_float_list() - ->mutable_value(); +protobuf::RepeatedField* GetFeatureValues(Feature* feature) { + return feature->mutable_float_list()->mutable_value(); } template <> const protobuf::RepeatedPtrField& GetFeatureValues( - const string& name, const Example& example) { - return example.features().feature().at(name).bytes_list().value(); + const Feature& feature) { + return feature.bytes_list().value(); +} + +template <> +protobuf::RepeatedPtrField* GetFeatureValues(Feature* feature) { + return feature->mutable_bytes_list()->mutable_value(); +} + +const protobuf::RepeatedPtrField& GetFeatureList( + const string& key, const SequenceExample& sequence_example) { + return sequence_example.feature_lists().feature_list().at(key).feature(); +} + +protobuf::RepeatedPtrField* GetFeatureList( + const string& feature_list_key, SequenceExample* sequence_example) { + return (*sequence_example->mutable_feature_lists() + ->mutable_feature_list())[feature_list_key] + .mutable_feature(); +} + +template <> +Features* GetFeatures(Features* proto) { + return proto; +} + +template <> +Features* GetFeatures(Example* proto) { + return proto->mutable_features(); } template <> -protobuf::RepeatedPtrField* GetFeatureValues(const string& name, - Example* example) { - return internal::ExampleFeature(name, example) - .mutable_bytes_list() - ->mutable_value(); +const Features& GetFeatures(const Features& proto) { + return proto; } +template <> +const Features& GetFeatures(const Example& proto) { + return proto.features(); +} + +template <> +const protobuf::RepeatedField& GetFeatureValues( + const Feature& feature); + +template <> +protobuf::RepeatedField* GetFeatureValues( + Feature* feature); + +template <> +const protobuf::RepeatedField& GetFeatureValues( + const Feature& feature); + +template <> +protobuf::RepeatedField* GetFeatureValues(Feature* feature); + +template <> +const protobuf::RepeatedPtrField& GetFeatureValues( + const Feature& feature); + +template <> +protobuf::RepeatedPtrField* GetFeatureValues(Feature* feature); } // namespace tensorflow diff --git a/tensorflow/core/example/feature_util.h b/tensorflow/core/example/feature_util.h index 4004411cb1..a87c2c9a57 100644 --- a/tensorflow/core/example/feature_util.h +++ b/tensorflow/core/example/feature_util.h @@ -13,9 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// A set of lightweight wrappers which simplify access to Example features. +// A set of lightweight wrappers which simplify access to Feature protos. // // TensorFlow Example proto uses associative maps on top of oneof fields. +// SequenceExample proto uses associative map of FeatureList. // So accessing feature values is not very convenient. // // For example, to read a first value of integer feature "tag": @@ -42,9 +43,59 @@ limitations under the License. // (RepeatedPtrField for byte list). So refer to its documentation of // RepeatedField for full list of supported methods. // -// NOTE: It is also important to mention that due to the nature of oneof proto -// fields setting a feature of one type automatically clears all values stored -// as another type with the same feature name. +// NOTE: Due to the nature of oneof proto fields setting a feature of one type +// automatically clears all values stored as another type with the same feature +// key. +// +// This library also has tools to work with SequenceExample protos. +// +// To get a value from SequenceExample.context: +// int id = GetFeatureValues("tag", se.context()).Get(0); +// To add a value to the context: +// GetFeatureValues("tag", se.mutable_context())->Add(42); +// +// To add values to feature_lists: +// AppendFeatureValues({4.0}, +// GetFeatureList("movie_ratings", &se)->Add()); +// AppendFeatureValues({5.0, 3.0}, +// GetFeatureList("movie_ratings", &se)->Add()); +// This will create a feature list keyed as "images" with two features: +// feature_lists { +// feature_list { +// key: "images" +// value { +// feature { float_list { value: [4.0] } } +// feature { float_list { value: [5.0, 3.0] } } +// } +// } } +// +// Functions exposed by this library: +// HasFeature<[FeatureType]>(key, proto) -> bool +// Returns true if a feature with the specified key, and optionally +// FeatureType, belongs to the Features or Example proto. +// HasFeatureList(key, sequence_example) -> bool +// Returns true if SequenceExample has a feature_list with the key. +// GetFeatureValues(key, proto) -> RepeatedField +// Returns values for the specified key and the FeatureType. +// Supported types for the proto: Example, Features. +// GetFeatureList(key, sequence_example) -> RepeatedPtrField +// Returns Feature protos associated with a key. +// AppendFeatureValues(begin, end, feature) +// AppendFeatureValues(container or initializer_list, feature) +// Copies values into a Feature. +// AppendFeatureValues(begin, end, key, proto) +// AppendFeatureValues(container or initializer_list, key, proto) +// Copies values into Features and Example protos with the specified key. +// +// Auxiliary functions, it is unlikely you'll need to use them directly: +// GetFeatures(proto) -> Features +// A convenience function to get Features proto. +// Supported types for the proto: Example, Features. +// GetFeature(key, proto) -> Feature* +// Returns a Feature proto for the specified key, creates a new if +// necessary. Supported types for the proto: Example, Features. +// GetFeatureValues(feature) -> RepeatedField +// Returns values of the feature for the FeatureType. #ifndef TENSORFLOW_EXAMPLE_FEATURE_H_ #define TENSORFLOW_EXAMPLE_FEATURE_H_ @@ -62,10 +113,11 @@ namespace tensorflow { namespace internal { +// DEPRECATED: Use GetFeature instead. +// TODO(gorban): Update all clients in a followup CL. // Returns a reference to a feature corresponding to the name. // Note: it will create a new Feature if it is missing in the example. -::tensorflow::Feature& ExampleFeature(const string& name, - ::tensorflow::Example* example); +Feature& ExampleFeature(const string& name, Example* example); // Specializations of RepeatedFieldTrait define a type of RepeatedField // corresponding to a selected feature type. @@ -127,89 +179,135 @@ struct FeatureTrait< } // namespace internal -// Returns true if feature with the specified name belongs to the example proto. -// Doesn't check feature type. Note that specialized versions return false if -// the feature has a wrong type. -template -bool ExampleHasFeature(const string& name, const Example& example) { - return example.features().feature().find(name) != - example.features().feature().end(); -} +// Returns true if sequence_example has a feature_list with the specified key. +bool HasFeatureList(const string& key, const SequenceExample& sequence_example); + +// A family of template functions to return mutable Features proto from a +// container proto. Supported ProtoTypes: Example, Features. +template +Features* GetFeatures(ProtoType* proto); + +template +const Features& GetFeatures(const ProtoType& proto); // Base declaration of a family of template functions to return a read only -// repeated field corresponding to a feature with the specified name. +// repeated field of feature values. template const typename internal::RepeatedFieldTrait::Type& -GetFeatureValues(const string& name, const Example& example); +GetFeatureValues(const Feature& feature); + +// Returns a read only repeated field corresponding to a feature with the +// specified name and FeatureType. Supported ProtoTypes: Example, Features. +template +const typename internal::RepeatedFieldTrait::Type& +GetFeatureValues(const string& key, const ProtoType& proto) { + return GetFeatureValues(GetFeatures(proto).feature().at(key)); +} -// Base declaration of a family of template functions to return a mutable -// repeated field corresponding to a feature with the specified name. +// Returns a mutable repeated field of a feature values. template typename internal::RepeatedFieldTrait::Type* GetFeatureValues( - const string& name, Example* example); + Feature* feature); + +// Returns a mutable repeated field corresponding to a feature with the +// specified name and FeatureType. Supported ProtoTypes: Example, Features. +template +typename internal::RepeatedFieldTrait::Type* GetFeatureValues( + const string& key, ProtoType* proto) { + ::tensorflow::Feature& feature = + (*GetFeatures(proto)->mutable_feature())[key]; + return GetFeatureValues(&feature); +} + +// Returns a Feature proto for the specified key, creates a new if necessary. +// Supported types for the proto: Example, Features. +template +Feature* GetFeature(const string& key, ProtoType* proto) { + return &(*GetFeatures(proto)->mutable_feature())[key]; +} + +// Returns a repeated field with features corresponding to a feature_list key. +const protobuf::RepeatedPtrField& GetFeatureList( + const string& key, const SequenceExample& sequence_example); + +// Returns a mutable repeated field with features corresponding to a +// feature_list key. It will create a new FeatureList if necessary. +protobuf::RepeatedPtrField* GetFeatureList( + const string& feature_list_key, SequenceExample* sequence_example); -// Copies elements from the range, defined by [first, last) into a feature. template void AppendFeatureValues(IteratorType first, IteratorType last, - const string& name, Example* example) { + Feature* feature) { using FeatureType = typename internal::FeatureTrait< typename std::iterator_traits::value_type>::Type; - std::copy(first, last, protobuf::RepeatedFieldBackInserter( - GetFeatureValues(name, example))); + std::copy(first, last, + protobuf::RepeatedFieldBackInserter( + GetFeatureValues(feature))); +} + +template +void AppendFeatureValues(std::initializer_list container, + Feature* feature) { + AppendFeatureValues(container.begin(), container.end(), feature); } -// Copies all elements from the container into a feature. template -void AppendFeatureValues(const ContainerType& container, const string& name, - Example* example) { +void AppendFeatureValues(const ContainerType& container, Feature* feature) { using IteratorType = typename ContainerType::const_iterator; - AppendFeatureValues(container.begin(), container.end(), name, - example); + AppendFeatureValues(container.begin(), container.end(), + feature); } -// Copies all elements from the initializer list into a feature. -template +// Copies elements from the range, defined by [first, last) into the feature +// obtainable from the (proto, key) combination. +template +void AppendFeatureValues(IteratorType first, IteratorType last, + const string& key, ProtoType* proto) { + AppendFeatureValues(first, last, GetFeature(key, GetFeatures(proto))); +} + +// Copies all elements from the container into a feature. +template +void AppendFeatureValues(const ContainerType& container, const string& key, + ProtoType* proto) { + using IteratorType = typename ContainerType::const_iterator; + AppendFeatureValues(container.begin(), container.end(), key, + proto); +} + +// Copies all elements from the initializer list into a Feature contained by +// Features or Example proto. +template void AppendFeatureValues(std::initializer_list container, - const string& name, Example* example) { + const string& key, ProtoType* proto) { using IteratorType = typename std::initializer_list::const_iterator; - AppendFeatureValues(container.begin(), container.end(), name, - example); + AppendFeatureValues(container.begin(), container.end(), key, + proto); } -template <> -bool ExampleHasFeature(const string& name, - const Example& example); - -template <> -bool ExampleHasFeature(const string& name, const Example& example); - -template <> -bool ExampleHasFeature(const string& name, const Example& example); - -template <> -const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example); - -template <> -protobuf::RepeatedField* GetFeatureValues( - const string& name, Example* example); - -template <> -const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example); - -template <> -protobuf::RepeatedField* GetFeatureValues(const string& name, - Example* example); - -template <> -const protobuf::RepeatedPtrField& GetFeatureValues( - const string& name, const Example& example); +// Returns true if a feature with the specified key belongs to the Features. +// The template parameter pack accepts zero or one template argument - which +// is FeatureType. If the FeatureType not specified (zero template arguments) +// the function will not check the feature type. Otherwise it will return false +// if the feature has a wrong type. +template +bool HasFeature(const string& key, const Features& features); + +// Returns true if a feature with the specified key belongs to the Example. +// Doesn't check feature type if used without FeatureType, otherwise the +// specialized versions return false if the feature has a wrong type. +template +bool HasFeature(const string& key, const Example& example) { + return HasFeature(key, GetFeatures(example)); +}; -template <> -protobuf::RepeatedPtrField* GetFeatureValues(const string& name, - Example* example); +// DEPRECATED: use HasFeature instead. +// TODO(gorban): update all clients in a followup CL. +template +bool ExampleHasFeature(const string& key, const Example& example) { + return HasFeature(key, example); +} } // namespace tensorflow #endif // TENSORFLOW_EXAMPLE_FEATURE_H_ diff --git a/tensorflow/core/example/feature_util_test.cc b/tensorflow/core/example/feature_util_test.cc index eb7b90af1b..cd32dee306 100644 --- a/tensorflow/core/example/feature_util_test.cc +++ b/tensorflow/core/example/feature_util_test.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - #include "tensorflow/core/example/feature_util.h" #include @@ -38,6 +37,16 @@ TEST(GetFeatureValuesInt64Test, ReadsASingleValue) { EXPECT_EQ(42, tag.Get(0)); } +TEST(GetFeatureValuesInt64Test, ReadsASingleValueFromFeature) { + Feature feature; + feature.mutable_int64_list()->add_value(42); + + auto values = GetFeatureValues(feature); + + ASSERT_EQ(1, values.size()); + EXPECT_EQ(42, values.Get(0)); +} + TEST(GetFeatureValuesInt64Test, WritesASingleValue) { Example example; @@ -48,25 +57,33 @@ TEST(GetFeatureValuesInt64Test, WritesASingleValue) { EXPECT_EQ(42, example.features().feature().at("tag").int64_list().value(0)); } +TEST(GetFeatureValuesInt64Test, WritesASingleValueToFeature) { + Feature feature; + + GetFeatureValues(&feature)->Add(42); + + ASSERT_EQ(1, feature.int64_list().value_size()); + EXPECT_EQ(42, feature.int64_list().value(0)); +} + TEST(GetFeatureValuesInt64Test, CheckUntypedFieldExistence) { Example example; - - EXPECT_FALSE(ExampleHasFeature("tag", example)); + ASSERT_FALSE(HasFeature("tag", example)); GetFeatureValues("tag", &example)->Add(0); - EXPECT_TRUE(ExampleHasFeature("tag", example)); + EXPECT_TRUE(HasFeature("tag", example)); } TEST(GetFeatureValuesInt64Test, CheckTypedFieldExistence) { Example example; GetFeatureValues("tag", &example)->Add(3.14); - ASSERT_FALSE(ExampleHasFeature("tag", example)); + ASSERT_FALSE(HasFeature("tag", example)); GetFeatureValues("tag", &example)->Add(42); - EXPECT_TRUE(ExampleHasFeature("tag", example)); + EXPECT_TRUE(HasFeature("tag", example)); auto tag_ro = GetFeatureValues("tag", example); ASSERT_EQ(1, tag_ro.size()); EXPECT_EQ(42, tag_ro.Get(0)); @@ -87,6 +104,16 @@ TEST(GetFeatureValuesInt64Test, CopyIterableToAField) { EXPECT_EQ(3, tag_ro.Get(2)); } +TEST(GetFeatureValuesFloatTest, ReadsASingleValueFromFeature) { + Feature feature; + feature.mutable_float_list()->add_value(3.14); + + auto values = GetFeatureValues(feature); + + ASSERT_EQ(1, values.size()); + EXPECT_NEAR(3.14, values.Get(0), kTolerance); +} + TEST(GetFeatureValuesFloatTest, ReadsASingleValue) { Example example; (*example.mutable_features()->mutable_feature())["tag"] @@ -99,6 +126,15 @@ TEST(GetFeatureValuesFloatTest, ReadsASingleValue) { EXPECT_NEAR(3.14, tag.Get(0), kTolerance); } +TEST(GetFeatureValuesFloatTest, WritesASingleValueToFeature) { + Feature feature; + + GetFeatureValues(&feature)->Add(3.14); + + ASSERT_EQ(1, feature.float_list().value_size()); + EXPECT_NEAR(3.14, feature.float_list().value(0), kTolerance); +} + TEST(GetFeatureValuesFloatTest, WritesASingleValue) { Example example; @@ -114,6 +150,20 @@ TEST(GetFeatureValuesFloatTest, WritesASingleValue) { TEST(GetFeatureValuesFloatTest, CheckTypedFieldExistence) { Example example; + GetFeatureValues("tag", &example)->Add(42); + ASSERT_FALSE(HasFeature("tag", example)); + + GetFeatureValues("tag", &example)->Add(3.14); + + EXPECT_TRUE(HasFeature("tag", example)); + auto tag_ro = GetFeatureValues("tag", example); + ASSERT_EQ(1, tag_ro.size()); + EXPECT_NEAR(3.14, tag_ro.Get(0), kTolerance); +} + +TEST(GetFeatureValuesFloatTest, CheckTypedFieldExistenceForDeprecatedMethod) { + Example example; + GetFeatureValues("tag", &example)->Add(42); ASSERT_FALSE(ExampleHasFeature("tag", example)); @@ -125,6 +175,16 @@ TEST(GetFeatureValuesFloatTest, CheckTypedFieldExistence) { EXPECT_NEAR(3.14, tag_ro.Get(0), kTolerance); } +TEST(GetFeatureValuesStringTest, ReadsASingleValueFromFeature) { + Feature feature; + feature.mutable_bytes_list()->add_value("FOO"); + + auto values = GetFeatureValues(feature); + + ASSERT_EQ(1, values.size()); + EXPECT_EQ("FOO", values.Get(0)); +} + TEST(GetFeatureValuesStringTest, ReadsASingleValue) { Example example; (*example.mutable_features()->mutable_feature())["tag"] @@ -137,6 +197,15 @@ TEST(GetFeatureValuesStringTest, ReadsASingleValue) { EXPECT_EQ("FOO", tag.Get(0)); } +TEST(GetFeatureValuesStringTest, WritesASingleValueToFeature) { + Feature feature; + + *GetFeatureValues(&feature)->Add() = "FOO"; + + ASSERT_EQ(1, feature.bytes_list().value_size()); + EXPECT_EQ("FOO", feature.bytes_list().value(0)); +} + TEST(GetFeatureValuesStringTest, WritesASingleValue) { Example example; @@ -148,15 +217,15 @@ TEST(GetFeatureValuesStringTest, WritesASingleValue) { example.features().feature().at("tag").bytes_list().value(0)); } -TEST(GetFeatureValuesBytesTest, CheckTypedFieldExistence) { +TEST(GetFeatureValuesStringTest, CheckTypedFieldExistence) { Example example; GetFeatureValues("tag", &example)->Add(42); - ASSERT_FALSE(ExampleHasFeature("tag", example)); + ASSERT_FALSE(HasFeature("tag", example)); *GetFeatureValues("tag", &example)->Add() = "FOO"; - EXPECT_TRUE(ExampleHasFeature("tag", example)); + EXPECT_TRUE(HasFeature("tag", example)); auto tag_ro = GetFeatureValues("tag", example); ASSERT_EQ(1, tag_ro.size()); EXPECT_EQ("FOO", tag_ro.Get(0)); @@ -228,5 +297,146 @@ TEST(AppendFeatureValuesTest, StringVariablesUsingInitializerList) { EXPECT_EQ("BAZ", tag_ro.Get(2)); } +TEST(SequenceExampleTest, ReadsASingleValueFromContext) { + SequenceExample se; + (*se.mutable_context()->mutable_feature())["tag"] + .mutable_int64_list() + ->add_value(42); + + auto values = GetFeatureValues("tag", se.context()); + + ASSERT_EQ(1, values.size()); + EXPECT_EQ(42, values.Get(0)); +} + +TEST(SequenceExampleTest, WritesASingleValueToContext) { + SequenceExample se; + + GetFeatureValues("tag", se.mutable_context())->Add(42); + + ASSERT_EQ(1, se.context().feature().at("tag").int64_list().value_size()); + EXPECT_EQ(42, se.context().feature().at("tag").int64_list().value(0)); +} + +TEST(SequenceExampleTest, AppendFeatureValuesToContextSingleArg) { + SequenceExample se; + + AppendFeatureValues({1.1, 2.2, 3.3}, "tag", se.mutable_context()); + + auto tag_ro = GetFeatureValues("tag", se.context()); + ASSERT_EQ(3, tag_ro.size()); + EXPECT_NEAR(1.1, tag_ro.Get(0), kTolerance); + EXPECT_NEAR(2.2, tag_ro.Get(1), kTolerance); + EXPECT_NEAR(3.3, tag_ro.Get(2), kTolerance); +} + +TEST(SequenceExampleTest, CheckTypedFieldExistence) { + SequenceExample se; + + GetFeatureValues("tag", se.mutable_context())->Add(3.14); + ASSERT_FALSE(HasFeature("tag", se.context())); + + GetFeatureValues("tag", se.mutable_context())->Add(42); + + EXPECT_TRUE(HasFeature("tag", se.context())); + auto tag_ro = GetFeatureValues("tag", se.context()); + ASSERT_EQ(1, tag_ro.size()); + EXPECT_EQ(42, tag_ro.Get(0)); +} + +TEST(SequenceExampleTest, ReturnsExistingFeatureLists) { + SequenceExample se; + (*se.mutable_feature_lists()->mutable_feature_list())["tag"] + .mutable_feature() + ->Add(); + + auto feature = GetFeatureList("tag", se); + + ASSERT_EQ(1, feature.size()); +} + +TEST(SequenceExampleTest, CreatesNewFeatureLists) { + SequenceExample se; + + GetFeatureList("tag", &se)->Add(); + + EXPECT_EQ(1, se.feature_lists().feature_list().at("tag").feature_size()); +} + +TEST(SequenceExampleTest, CheckFeatureListExistence) { + SequenceExample se; + ASSERT_FALSE(HasFeatureList("tag", se)); + + GetFeatureList("tag", &se)->Add(); + + ASSERT_TRUE(HasFeatureList("tag", se)); +} + +TEST(SequenceExampleTest, AppendFeatureValuesWithInitializerList) { + SequenceExample se; + + AppendFeatureValues({1, 2, 3}, "ids", se.mutable_context()); + AppendFeatureValues({"cam1-0", "cam2-0"}, + GetFeatureList("images", &se)->Add()); + AppendFeatureValues({"cam1-1", "cam2-2"}, + GetFeatureList("images", &se)->Add()); + + EXPECT_EQ(se.DebugString(), + "context {\n" + " feature {\n" + " key: \"ids\"\n" + " value {\n" + " int64_list {\n" + " value: 1\n" + " value: 2\n" + " value: 3\n" + " }\n" + " }\n" + " }\n" + "}\n" + "feature_lists {\n" + " feature_list {\n" + " key: \"images\"\n" + " value {\n" + " feature {\n" + " bytes_list {\n" + " value: \"cam1-0\"\n" + " value: \"cam2-0\"\n" + " }\n" + " }\n" + " feature {\n" + " bytes_list {\n" + " value: \"cam1-1\"\n" + " value: \"cam2-2\"\n" + " }\n" + " }\n" + " }\n" + " }\n" + "}\n"); +} + +TEST(SequenceExampleTest, AppendFeatureValuesWithVectors) { + SequenceExample se; + + std::vector readings{1.0, 2.5, 5.0}; + AppendFeatureValues(readings, GetFeatureList("movie_ratings", &se)->Add()); + + EXPECT_EQ(se.DebugString(), + "feature_lists {\n" + " feature_list {\n" + " key: \"movie_ratings\"\n" + " value {\n" + " feature {\n" + " float_list {\n" + " value: 1\n" + " value: 2.5\n" + " value: 5\n" + " }\n" + " }\n" + " }\n" + " }\n" + "}\n"); +} + } // namespace } // namespace tensorflow -- GitLab From d6ec67f64c6d9caa0567c793dfea7b3a405d685f Mon Sep 17 00:00:00 2001 From: Louie Helm Date: Sat, 2 Sep 2017 12:21:08 -0700 Subject: [PATCH 187/294] Avoid gcc7 memcpy build error by updating BoringSSL (#12752) * Avoid gcc7 memcpy build error by updating BoringSSL * Harmonize cmake for BoringSSL for windows build --- tensorflow/contrib/cmake/external/boringssl.cmake | 2 +- tensorflow/workspace.bzl | 8 ++++---- third_party/boringssl/add_boringssl_s390x.patch | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake index 04a9664701..dc27eadaca 100644 --- a/tensorflow/contrib/cmake/external/boringssl.cmake +++ b/tensorflow/contrib/cmake/external/boringssl.cmake @@ -17,7 +17,7 @@ include (ExternalProject) set(boringssl_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/boringssl/src/boringssl/include) #set(boringssl_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/boringssl/src) set(boringssl_URL https://boringssl.googlesource.com/boringssl) -set(boringssl_TAG 17cf2cb1d226b0ba2401304242df7ddd3b6f1ff2) +set(boringssl_TAG ee7aa02) set(boringssl_BUILD ${CMAKE_BINARY_DIR}/boringssl/src/boringssl-build) #set(boringssl_LIBRARIES ${boringssl_BUILD}/obj/so/libboringssl.so) set(boringssl_STATIC_LIBRARIES diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 2c7acd809a..868b5c6e42 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -573,11 +573,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): patched_http_archive( name = "boringssl", urls = [ - "http://mirror.bazel.build/github.com/google/boringssl/archive/bbcaa15b0647816b9a1a9b9e0d209cd6712f0105.tar.gz", - "https://github.com/google/boringssl/archive/bbcaa15b0647816b9a1a9b9e0d209cd6712f0105.tar.gz", # 2016-07-11 + "http://mirror.bazel.build/github.com/google/boringssl/archive/e3860009a091cd1bd2bc189cdbc3c6d095abde84.tar.gz", + "https://github.com/google/boringssl/archive/e3860009a091cd1bd2bc189cdbc3c6d095abde84.tar.gz", # 2017-07-07 ], - sha256 = "025264d6e9a7ad371f2f66d17a28b6627de0c9592dc2eb54afd062f68f1f9aa3", - strip_prefix = "boringssl-bbcaa15b0647816b9a1a9b9e0d209cd6712f0105", + sha256 = "02f5950f93c4fd3691771c07c9d04cf2999ab01383ff99da345249e93b0fcfb2", + strip_prefix = "boringssl-e3860009a091cd1bd2bc189cdbc3c6d095abde84", # Add patch to boringssl code to support s390x patch_file = str(Label("//third_party/boringssl:add_boringssl_s390x.patch")), ) diff --git a/third_party/boringssl/add_boringssl_s390x.patch b/third_party/boringssl/add_boringssl_s390x.patch index 9a34a59a1d..8b42d10e68 100644 --- a/third_party/boringssl/add_boringssl_s390x.patch +++ b/third_party/boringssl/add_boringssl_s390x.patch @@ -3,9 +3,9 @@ index 7a3adfb..88012ad 100644 --- a/src/include/openssl/base.h +++ b/src/include/openssl/base.h @@ -94,6 +94,8 @@ extern "C" { - #elif defined(__pnacl__) - #define OPENSSL_32_BIT #define OPENSSL_PNACL + #elif defined(__myriad2__) + #define OPENSSL_32_BIT +#elif defined(__s390x__) +#define OPENSSL_64_BIT #else -- GitLab From ddba1e0aadabe26063a28c5d1c48e2cfce44e30f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 2 Sep 2017 14:35:12 -0700 Subject: [PATCH 188/294] Replace CHECKs in v1 checkpoint loading codepath with returning errors. PiperOrigin-RevId: 167392822 --- tensorflow/core/kernels/save_restore_tensor.cc | 9 ++++++--- tensorflow/core/util/tensor_slice_reader.h | 17 +++++++++++------ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/save_restore_tensor.cc b/tensorflow/core/kernels/save_restore_tensor.cc index 80d4901740..6b06cf650a 100644 --- a/tensorflow/core/kernels/save_restore_tensor.cc +++ b/tensorflow/core/kernels/save_restore_tensor.cc @@ -216,9 +216,12 @@ void RestoreTensor(OpKernelContext* context, if (output_shape.num_elements() == 0) return; -#define READER_COPY(T) \ - case DataTypeToEnum::value: \ - reader->CopySliceData(tensor_name, slice_to_load, t->flat().data()); \ +#define READER_COPY(T) \ + case DataTypeToEnum::value: \ + OP_REQUIRES(context, \ + reader->CopySliceData(tensor_name, slice_to_load, \ + t->flat().data()), \ + errors::InvalidArgument("Error copying slice data")); \ break; switch (type) { diff --git a/tensorflow/core/util/tensor_slice_reader.h b/tensorflow/core/util/tensor_slice_reader.h index eeb3129573..5932d59a15 100644 --- a/tensorflow/core/util/tensor_slice_reader.h +++ b/tensorflow/core/util/tensor_slice_reader.h @@ -165,13 +165,18 @@ bool TensorSliceReader::CopySliceData(const string& name, CHECK_GE(idx, 0) << "Failed to find the index for filename " << fname; // We read a record in the corresponding sstable const string key = EncodeTensorNameSlice(name, slice_s); - CHECK(sss_[idx]->Get(key, &value)) - << "Failed to seek to the record for tensor " << name << ", slice " - << slice_s.DebugString() << ": computed key = " << key; + if (!sss_[idx]->Get(key, &value)) { + VLOG(1) << "Failed to seek to the record for tensor " << name + << ", slice " << slice_s.DebugString() + << ": computed key = " << key; + return false; + } SavedTensorSlices sts; - CHECK(ParseProtoUnlimited(&sts, value)) - << "Failed to parse the record for tensor " << name << ", slice " - << slice_s.DebugString() << ": computed key = " << key; + if (!ParseProtoUnlimited(&sts, value)) { + VLOG(1) << "Failed to parse the record for tensor " << name << ", slice " + << slice_s.DebugString() << ": computed key = " << key; + return false; + } CopyDataFromTensorSliceToTensorSlice( tss->shape(), slice_s, slice, checkpoint::TensorProtoData(sts.data().data()), data); -- GitLab From 0e87dfd2be81b2f7f5c805e6490d0d2938058af9 Mon Sep 17 00:00:00 2001 From: MtDersvan Date: Sat, 2 Sep 2017 15:39:17 -0700 Subject: [PATCH 189/294] Formating typo --- tensorflow/contrib/data/python/ops/dataset_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index 46af2b1949..37dc6b4469 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -2460,7 +2460,7 @@ def rejection_resample(dataset, shapes and types defined by `dataset.output_shapes` and `dataset.output_types`) to a scalar `tf.int32` tensor. Values should be in `[0, num_classes)`. - target_dist: A floating point type tensor, shaped `[num_classes]. + target_dist: A floating point type tensor, shaped `[num_classes]`. initial_dist: (Optional.) A floating point type tensor, shaped `[num_classes]`. If not provided, the true class distribution is estimated live in a streaming fashion. -- GitLab From f3a70b8c3c11f92792d500bb25fc92bd02a4303d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Mon, 28 Aug 2017 09:59:10 +0800 Subject: [PATCH 190/294] TST: add unit test --- tensorflow/python/feature_column/feature_column_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 5138f31e98..c5310777fd 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -3213,12 +3213,13 @@ class IndicatorColumnTest(test.TestCase): weights = fc.weighted_categorical_column(ids, 'weights') indicator = fc.indicator_column(weights) features = { - 'ids': constant_op.constant(['c', 'b', 'a'], shape=(1, 3)), - 'weights': constant_op.constant([2., 4., 6.], shape=(1, 3)) + # Github issue 12583 + 'ids': constant_op.constant([['c', 'b', 'unknown']]), + 'weights': constant_op.constant([[2., 4., 6.]]) } indicator_tensor = _transform_features(features, [indicator])[indicator] with _initialized_session(): - self.assertAllEqual([[6., 4., 2.]], indicator_tensor.eval()) + self.assertAllClose([[0., 4., 2.]], indicator_tensor.eval()) def test_linear_model(self): animal = fc.indicator_column( -- GitLab From 1dab44ca2ca500170f4575202f387b44081f92f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Mon, 28 Aug 2017 10:05:12 +0800 Subject: [PATCH 191/294] BUG: fix, indices is out of bounds --- tensorflow/python/feature_column/feature_column.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index a8434d0c99..f64235d70b 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -2474,6 +2474,11 @@ class _IndicatorColumn(_DenseColumn, sp_ids=id_tensor, sp_values=weight_tensor, vocab_size=int(self._variable_shape[-1])) + # Remove (?, -1) index + weighted_column = sparse_ops.sparse_slice( + weighted_column, + [0, 0], + weighted_column.dense_shape) return sparse_ops.sparse_tensor_to_dense(weighted_column) dense_id_tensor = sparse_ops.sparse_tensor_to_dense( -- GitLab From ef4b1ded94c55bc77ca5a9cbee2ff8212f73d773 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 30 Aug 2017 12:16:46 +0800 Subject: [PATCH 192/294] Revert "TST: add unit test" This reverts commit fe9910b6a4a0425a987d39ffa1b817d364d2e1cc. --- tensorflow/python/feature_column/feature_column_test.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index c5310777fd..5138f31e98 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -3213,13 +3213,12 @@ class IndicatorColumnTest(test.TestCase): weights = fc.weighted_categorical_column(ids, 'weights') indicator = fc.indicator_column(weights) features = { - # Github issue 12583 - 'ids': constant_op.constant([['c', 'b', 'unknown']]), - 'weights': constant_op.constant([[2., 4., 6.]]) + 'ids': constant_op.constant(['c', 'b', 'a'], shape=(1, 3)), + 'weights': constant_op.constant([2., 4., 6.], shape=(1, 3)) } indicator_tensor = _transform_features(features, [indicator])[indicator] with _initialized_session(): - self.assertAllClose([[0., 4., 2.]], indicator_tensor.eval()) + self.assertAllEqual([[6., 4., 2.]], indicator_tensor.eval()) def test_linear_model(self): animal = fc.indicator_column( -- GitLab From 3c8edb8044671196c73a149ae2e535ae706c3400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 30 Aug 2017 12:21:51 +0800 Subject: [PATCH 193/294] TST: remove shape arg --- tensorflow/python/feature_column/feature_column_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 5138f31e98..0bde1cc356 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -3213,8 +3213,8 @@ class IndicatorColumnTest(test.TestCase): weights = fc.weighted_categorical_column(ids, 'weights') indicator = fc.indicator_column(weights) features = { - 'ids': constant_op.constant(['c', 'b', 'a'], shape=(1, 3)), - 'weights': constant_op.constant([2., 4., 6.], shape=(1, 3)) + 'ids': constant_op.constant([['c', 'b', 'a']]), + 'weights': constant_op.constant([[2., 4., 6.]]) } indicator_tensor = _transform_features(features, [indicator])[indicator] with _initialized_session(): -- GitLab From 7bcb478cd047d01535870be0bb07dcf197d655f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 30 Aug 2017 12:22:18 +0800 Subject: [PATCH 194/294] TST: add test, missing value in weighted column --- .../python/feature_column/feature_column_test.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 0bde1cc356..8fd5614759 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -3220,6 +3220,20 @@ class IndicatorColumnTest(test.TestCase): with _initialized_session(): self.assertAllEqual([[6., 4., 2.]], indicator_tensor.eval()) + def test_transform_with_missing_value_in_weighted_column(self): + # Github issue 12583 + ids = fc.categorical_column_with_vocabulary_list( + key='ids', vocabulary_list=('a', 'b', 'c')) + weights = fc.weighted_categorical_column(ids, 'weights') + indicator = fc.indicator_column(weights) + features = { + 'ids': constant_op.constant([['c', 'b', 'unknown']]), + 'weights': constant_op.constant([[2., 4., 6.]]) + } + indicator_tensor = _transform_features(features, [indicator])[indicator] + with _initialized_session(): + self.assertAllEqual([[0., 4., 2.]], indicator_tensor.eval()) + def test_linear_model(self): animal = fc.indicator_column( fc.categorical_column_with_identity('animal', num_buckets=4)) -- GitLab From 4e5310f8a69a7d3d7301550d518aa07231388de1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 30 Aug 2017 12:24:48 +0800 Subject: [PATCH 195/294] TST: add test, missing value in category column --- .../python/feature_column/feature_column_test.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 8fd5614759..e707770f8a 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -3234,6 +3234,18 @@ class IndicatorColumnTest(test.TestCase): with _initialized_session(): self.assertAllEqual([[0., 4., 2.]], indicator_tensor.eval()) + def test_transform_with_missing_value_in_categorical_column(self): + # Github issue 12583 + ids = fc.categorical_column_with_vocabulary_list( + key='ids', vocabulary_list=('a', 'b', 'c')) + indicator = fc.indicator_column(ids) + features = { + 'ids': constant_op.constant([['c', 'b', 'unknown']]), + } + indicator_tensor = _transform_features(features, [indicator])[indicator] + with _initialized_session(): + self.assertAllEqual([[0., 1., 1.]], indicator_tensor.eval()) + def test_linear_model(self): animal = fc.indicator_column( fc.categorical_column_with_identity('animal', num_buckets=4)) -- GitLab From 1b42dbb0fbe5f3d172cb607d7c468c481dd880d5 Mon Sep 17 00:00:00 2001 From: Anton Daitche Date: Sun, 27 Aug 2017 15:39:34 +0200 Subject: [PATCH 196/294] Make Dataset.map not unpack namedtuple --- .../kernel_tests/map_dataset_op_test.py | 35 +++++++++++++++++++ .../contrib/data/python/ops/dataset_ops.py | 3 +- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py index 97b4ec44fc..d05fbb7d28 100644 --- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import os import threading +from collections import namedtuple import numpy as np @@ -481,6 +482,40 @@ class MapDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testMapNamedtuple(self, count=10): + # construct dataset of tuples + labels = dataset_ops.Dataset.range(count) + images = labels.map(lambda l: -l) + dataset_tuple = dataset_ops.Dataset.zip((labels, images)) + + # convert dataset of tuples to dataset of namedtuples + Example = namedtuple("Example", ["label", "image"]) + dataset_namedtuple = dataset_tuple.map(Example) + + def preprocess_tuple(label, image): + image = 2 * image + return label, image + + def preprocess_namedtuple(example): + return example._replace(image=2 * example.image) + + # preprocess both datasets + dataset_tuple = dataset_tuple.map(preprocess_tuple) + dataset_namedtuple = dataset_namedtuple.map(preprocess_namedtuple) + + next_tuple = dataset_tuple.make_one_shot_iterator().get_next() + next_namedtuple = dataset_namedtuple.make_one_shot_iterator().get_next() + + # make sure both datasets contain the same data + with self.test_session() as sess: + for i in range(count): + tuple_, namedtuple_ = sess.run([next_tuple, next_namedtuple]) + self.assertEqual(tuple_, namedtuple_) + self.assertEqual(tuple_, (i, -2 * i)) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_namedtuple) + def testUseStepContainerInMap(self): row = np.arange(6) iterator = ( diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index 37dc6b4469..1793565867 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -1903,7 +1903,8 @@ class DenseToSparseBatchDataset(Dataset): def _should_unpack_args(args): """Returns `True` if `args` should be `*args` when passed to a callable.""" - return nest.is_sequence(args) and not isinstance(args, dict) + is_namedtuple = isinstance(args, tuple) and hasattr(args, "_fields") + return nest.is_sequence(args) and not isinstance(args, dict) and not is_namedtuple class _ResourceDataset(Dataset): -- GitLab From e8a91fdaf055a3566c4c75f700aa09bbc4c91779 Mon Sep 17 00:00:00 2001 From: Anton Daitche Date: Mon, 28 Aug 2017 16:28:10 +0200 Subject: [PATCH 197/294] Simplify _should_unpack_args() --- tensorflow/contrib/data/python/ops/dataset_ops.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index 1793565867..ff09a3f7ff 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -1903,8 +1903,7 @@ class DenseToSparseBatchDataset(Dataset): def _should_unpack_args(args): """Returns `True` if `args` should be `*args` when passed to a callable.""" - is_namedtuple = isinstance(args, tuple) and hasattr(args, "_fields") - return nest.is_sequence(args) and not isinstance(args, dict) and not is_namedtuple + return type(args) is tuple # pylint: disable=unidiomatic-typecheck class _ResourceDataset(Dataset): -- GitLab From e50d1caaa919db7ef47eaf4c425733f9784f7cc2 Mon Sep 17 00:00:00 2001 From: Anton Daitche Date: Mon, 28 Aug 2017 18:07:19 +0200 Subject: [PATCH 198/294] Change Dataset.InterleaveDataset to use _should_unpack_args Previously it used nest.is_sequence, which was inconsistent with the other Dataset classes. --- tensorflow/contrib/data/python/ops/dataset_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index ff09a3f7ff..20e564b8b7 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -2151,7 +2151,7 @@ class InterleaveDataset(Dataset): nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - if nest.is_sequence(nested_args): + if _should_unpack_args(nested_args): dataset = map_func(*nested_args) else: dataset = map_func(nested_args) -- GitLab From d57572e996dce24abf4d9cf6ea04e7104b3d743b Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Sat, 2 Sep 2017 19:21:45 -0700 Subject: [PATCH 199/294] Merge changes from github. PiperOrigin-RevId: 167401527 --- README.md | 8 ++++++ RELEASE.md | 2 +- WORKSPACE | 8 +++--- configure.py | 10 +++---- tensorflow/cc/gradients/nn_grad.cc | 8 ++++++ tensorflow/cc/gradients/nn_grad_test.cc | 8 ++++++ .../gpu/llvm_gpu_backend/gpu_backend_lib.cc | 2 +- tensorflow/contrib/cmake/tf_tests.cmake | 2 ++ tensorflow/contrib/gdr/BUILD | 1 + tensorflow/contrib/gdr/gdr_memory_manager.h | 7 +---- tensorflow/contrib/layers/__init__.py | 1 + .../layers/python/layers/optimizers.py | 5 ++-- .../python/learn/learn_io/data_feeder.py | 26 ++++++++++-------- tensorflow/contrib/makefile/Makefile | 12 ++++----- tensorflow/contrib/makefile/README.md | 3 ++- .../base_rendezvous_mgr.cc | 27 ++++++++++--------- tensorflow/core/framework/op_kernel.h | 2 +- tensorflow/core/profiler/g3doc/advise.md | 4 +-- .../core/profiler/g3doc/command_line.md | 6 ++--- tensorflow/core/profiler/g3doc/options.md | 8 +++--- .../core/profiler/g3doc/profile_memory.md | 2 +- .../g3doc/profile_model_architecture.md | 8 +++--- .../core/profiler/g3doc/profile_time.md | 12 ++++----- tensorflow/docs_src/install/install_linux.md | 6 ++--- .../docs_src/install/install_windows.md | 6 +++-- tensorflow/examples/speech_commands/README.md | 2 +- .../python/feature_column/feature_column.py | 2 +- .../feature_column/feature_column_test.py | 14 ++++++++++ .../stream_executor/device_description.h | 4 +-- tensorflow/stream_executor/kernel.h | 2 +- tensorflow/tools/ci_build/update_version.py | 5 ++-- tensorflow/tools/pip_package/BUILD | 1 + third_party/sqlite.BUILD | 4 +-- 33 files changed, 134 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index 87c7b1bfa9..5a0739a603 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,15 @@ and discussion, and please direct specific questions to [Stack Overflow](https:/ People who are a little more adventurous can also try our nightly binaries: +**Nightly pip packages** +* We are pleased to announce that TensorFlow now offers nightly pip packages +under the [tf-nightly](https://pypi.python.org/pypi/tf-nightly) project on pypi. +Simply run `pip install tf-nightly` in a clean environment to install the nightly +tensorflow build. We currently only support CPU-only packages on Linux and Mac. +GPU packages on all platforms and Windows CPU-only packages will arrive soon! + +**Individual whl files** * Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-1.3.0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-1.3.0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-1.3.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/)) * Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow_gpu-1.3.0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow_gpu-1.3.0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow_gpu-1.3.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/)) * Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-1.3.0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-1.3.0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/)) diff --git a/RELEASE.md b/RELEASE.md index d120f068ca..3d497dbaa9 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -46,7 +46,7 @@ See also [TensorBoard 0.1.4](https://github.com/tensorflow/tensorboard/releases/ * Display feed values with the `print_feed` or `pf` command and clickable links in the curses UI. * Runtime profiler at the op level and the Python source line level with the `run -p` command. * Initial release of the statistical distribution library `tf.distributions`. -* GPU kernels and speed improvements for for unary `tf.where` and `tf.nn.top_k`. +* GPU kernels and speed improvements for unary `tf.where` and `tf.nn.top_k`. * Monotonic Attention wrappers added to `tf.contrib.seq2seq`. * Added `tf.contrib.signal`, a library for signal processing primitives. * Added `tf.contrib.resampler`, containing CPU and GPU ops for differentiable resampling of images. diff --git a/WORKSPACE b/WORKSPACE index 5e9b991fcc..a0fe67bf31 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -2,11 +2,11 @@ workspace(name = "org_tensorflow") http_archive( name = "io_bazel_rules_closure", - sha256 = "bc41b80486413aaa551860fc37471dbc0666e1dbb5236fb6177cb83b0c105846", - strip_prefix = "rules_closure-dec425a4ff3faf09a56c85d082e4eed05d8ce38f", + sha256 = "25f5399f18d8bf9ce435f85c6bbf671ec4820bc4396b3022cc5dc4bc66303609", + strip_prefix = "rules_closure-0.4.2", urls = [ - "http://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/dec425a4ff3faf09a56c85d082e4eed05d8ce38f.tar.gz", # 2017-06-02 - "https://github.com/bazelbuild/rules_closure/archive/dec425a4ff3faf09a56c85d082e4eed05d8ce38f.tar.gz", + "http://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/0.4.2.tar.gz", # 2017-08-29 + "https://github.com/bazelbuild/rules_closure/archive/0.4.2.tar.gz", ], ) diff --git a/configure.py b/configure.py index 1a0f71ed94..186fdc9ddc 100644 --- a/configure.py +++ b/configure.py @@ -143,7 +143,7 @@ def run_shell(cmd, allow_non_zero=False): def cygpath(path): """Convert path from posix to windows.""" - return run_shell(['cygpath', '-m', path]) + return os.path.abspath(path).replace('\\', '/') def get_python_path(environ_cp, python_bin_path): @@ -196,7 +196,7 @@ def setup_python(environ_cp, bazel_version): environ_cp['PYTHON_BIN_PATH'] = '' # Convert python path to Windows style before checking lib and version - if is_cygwin(): + if is_windows() or is_cygwin(): python_bin_path = cygpath(python_bin_path) # Get PYTHON_LIB_PATH @@ -219,7 +219,7 @@ def setup_python(environ_cp, bazel_version): python_major_version = get_python_major_version(python_bin_path) # Convert python path to Windows style before writing into bazel.rc - if is_cygwin(): + if is_windows() or is_cygwin(): python_lib_path = cygpath(python_lib_path) # Set-up env variables used by python_configure.bzl @@ -600,7 +600,7 @@ def set_tf_cuda_version(environ_cp): # Find out where the CUDA toolkit is installed default_cuda_path = _DEFAULT_CUDA_PATH - if is_cygwin(): + if is_windows() or is_cygwin(): default_cuda_path = cygpath( environ_cp.get('CUDA_PATH', _DEFAULT_CUDA_PATH_WIN)) elif is_linux(): @@ -660,7 +660,7 @@ def set_tf_cunn_version(environ_cp): # unusable. Going through one more level of expansion to handle that. cudnn_install_path = os.path.realpath( os.path.expanduser(cudnn_install_path)) - if is_cygwin(): + if is_windows() or is_cygwin(): cudnn_install_path = cygpath(cudnn_install_path) if is_windows(): diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 6fc73c3fa1..ccb58e7f91 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -95,6 +95,14 @@ Status SeluGradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("Selu", SeluGradHelper); +Status L2LossGrad(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + grad_outputs->push_back(Mul(scope, op.input(0), grad_inputs[0])); + return scope.status(); +} +REGISTER_GRADIENT_OP("L2Loss", L2LossGrad); + Status BiasAddGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index f9a512f29e..affc1e1dbe 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -122,6 +122,14 @@ TEST_F(NNGradTest, SeluGrad) { RunTest(x, x_init_value, y, shape); } +TEST_F(NNGradTest, L2LossGrad) { + TensorShape x_shape({5, 2}); + TensorShape y_shape({1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto y = L2Loss(scope_, x); + RunTest(x, x_shape, y, y_shape); +} + TEST_F(NNGradTest, BiasAddGradHelper) { TensorShape shape({4, 5}); TensorShape bias_shape({5}); diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 2a999f52f0..2e7765c4c6 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -389,7 +389,7 @@ StatusOr CompileModuleToPtx(llvm::Module* module, // Loop unrolling exposes more opportunities for SROA. Therefore, we run SROA // again after the standard optimization passes [http://b/13329423]. - // TODO(jingyue): SROA may further expose more optimization opportunities, such + // TODO(jingyue): SROA may further expose more optimization opportunities such // as more precise alias analysis and more function inlining (SROA may change // the inlining cost of a function). For now, running SROA already emits good // enough code for the evaluated benchmarks. We may want to run more diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 6507a9a5e0..15850bf0a4 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -82,6 +82,7 @@ function(AddTest) set_tests_properties(${_AT_TARGET} PROPERTIES ENVIRONMENT "TEST_TMPDIR=${tempdir};TEST_SRCDIR=${testdir}" ) + set_tests_properties(${_AT_TARGET} PROPERTIES TIMEOUT "600") foreach(datafile ${_AT_DATA}) file(RELATIVE_PATH datafile_rel ${tensorflow_source_dir} ${datafile}) @@ -117,6 +118,7 @@ function(AddPythonTests) if (_AT_DEPENDS) add_dependencies(${_AT_TARGET} ${_AT_DEPENDS}) endif() + set_tests_properties(${sourcefile} PROPERTIES TIMEOUT "600") endforeach() endfunction(AddPythonTests) diff --git a/tensorflow/contrib/gdr/BUILD b/tensorflow/contrib/gdr/BUILD index 645e364d19..bebcf079ba 100644 --- a/tensorflow/contrib/gdr/BUILD +++ b/tensorflow/contrib/gdr/BUILD @@ -62,6 +62,7 @@ tf_cuda_library( }), deps = [ ":gdr_proto_cc", + "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:gpu_runtime", "//tensorflow/core:lib", diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.h b/tensorflow/contrib/gdr/gdr_memory_manager.h index 7e9fe01e97..e0e2a3f624 100644 --- a/tensorflow/contrib/gdr/gdr_memory_manager.h +++ b/tensorflow/contrib/gdr/gdr_memory_manager.h @@ -16,14 +16,9 @@ limitations under the License. #ifndef GDR_MEMORY_MANAGER_H_ #define GDR_MEMORY_MANAGER_H_ +#include "google/protobuf/any.pb.h" #include "tensorflow/core/lib/core/status.h" -namespace google { -namespace protobuf { -class Any; -} -} - namespace tensorflow { class Device; diff --git a/tensorflow/contrib/layers/__init__.py b/tensorflow/contrib/layers/__init__.py index 674f5db107..ea8d9e0c63 100644 --- a/tensorflow/contrib/layers/__init__.py +++ b/tensorflow/contrib/layers/__init__.py @@ -115,6 +115,7 @@ _allowed_symbols = ['bias_add', 'legacy_linear', 'legacy_relu', 'OPTIMIZER_CLS_NAMES', + 'OPTIMIZER_SUMMARIES', 'regression_target', 'SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY', 'summaries'] diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py index ac217f043f..7eb410b4c7 100644 --- a/tensorflow/contrib/layers/python/layers/optimizers.py +++ b/tensorflow/contrib/layers/python/layers/optimizers.py @@ -129,8 +129,9 @@ def optimize_loss(loss, `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not - set only the loss and the learning rate will be reported. The - complete list is in OPTIMIZER_SUMMARIES. + set, the loss, the learning rate, and the global norm of the + gradients will be reported. The complete list of possible values + is in OPTIMIZER_SUMMARIES. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. increment_global_step: Whether to increment `global_step`. If your model diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index 48d79ecbbf..4c50d40aaa 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -28,7 +28,6 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -44,7 +43,7 @@ def _get_in_out_shape(x_shape, y_shape, n_classes, batch_size=None): x_is_dict, y_is_dict = isinstance( x_shape, dict), y_shape is not None and isinstance(y_shape, dict) if y_is_dict and n_classes is not None: - assert (isinstance(n_classes, dict)) + assert isinstance(n_classes, dict) if batch_size is None: batch_size = list(x_shape.values())[0][0] if x_is_dict else x_shape[0] @@ -322,10 +321,12 @@ class DataFeeder(object): self._x = dict([(k, check_array(v, v.dtype)) for k, v in list(x.items()) ]) if x_is_dict else check_array(x, x.dtype) - self._y = None if y is None else \ - dict([(k, check_array(v, v.dtype)) for k, v in list(y.items())]) if x_is_dict else check_array(y, y.dtype) + self._y = None if y is None else ( + dict([(k, check_array(v, v.dtype)) for k, v in list(y.items())]) + if y_is_dict else check_array(y, y.dtype)) - # self.n_classes is not None means we're converting raw target indices to one-hot. + # self.n_classes is not None means we're converting raw target indices + # to one-hot. if n_classes is not None: if not y_is_dict: y_dtype = (np.int64 @@ -344,12 +345,15 @@ class DataFeeder(object): x_shape, y_shape, n_classes, batch_size) # Input dtype matches dtype of x. - self._input_dtype = dict([(k, _check_dtype(v.dtype)) for k, v in list(self._x.items())]) if x_is_dict \ - else _check_dtype(self._x.dtype) - - # note: self._output_dtype = np.float32 when y is None - self._output_dtype = dict([(k, _check_dtype(v.dtype)) for k, v in list(self._y.items())]) if y_is_dict \ - else _check_dtype(self._y.dtype) if y is not None else np.float32 + self._input_dtype = ( + dict([(k, _check_dtype(v.dtype)) for k, v in list(self._x.items())]) + if x_is_dict else _check_dtype(self._x.dtype)) + + # self._output_dtype == np.float32 when y is None + self._output_dtype = ( + dict([(k, _check_dtype(v.dtype)) for k, v in list(self._y.items())]) + if y_is_dict else ( + _check_dtype(self._y.dtype) if y is not None else np.float32)) # self.n_classes is None means we're passing in raw target indices if n_classes is not None and y_is_dict: diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index 98af47d728..30897bb202 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -316,14 +316,14 @@ ifeq ($(TARGET),IOS) IPHONESIMULATOR_SYSROOT := $(shell xcrun --sdk iphonesimulator \ --show-sdk-path) IOS_SDK_VERSION := $(shell xcrun --sdk iphoneos --show-sdk-version) - MIN_SDK_VERSION := 8.0 + MIN_SDK_VERSION := 9.0 # Override IOS_ARCH with ARMV7, ARMV7S, ARM64, or I386. IOS_ARCH := X86_64 ifeq ($(IOS_ARCH),ARMV7) CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \ -arch armv7 \ -fembed-bitcode \ - -D__thread= \ + -D__thread=thread_local \ -DUSE_GEMM_FOR_CONV \ -Wno-c++11-narrowing \ -mno-thumb \ @@ -347,7 +347,7 @@ ifeq ($(TARGET),IOS) CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \ -arch armv7s \ -fembed-bitcode \ - -D__thread= \ + -D__thread=thread_local \ -DUSE_GEMM_FOR_CONV \ -Wno-c++11-narrowing \ -mno-thumb \ @@ -371,7 +371,7 @@ ifeq ($(TARGET),IOS) CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \ -arch arm64 \ -fembed-bitcode \ - -D__thread= \ + -D__thread=thread_local \ -DUSE_GEMM_FOR_CONV \ -Wno-c++11-narrowing \ -DTF_LEAN_BINARY \ @@ -395,7 +395,7 @@ ifeq ($(TARGET),IOS) -arch i386 \ -mno-sse \ -fembed-bitcode \ - -D__thread= \ + -D__thread=thread_local \ -DUSE_GEMM_FOR_CONV \ -Wno-c++11-narrowing \ -DTF_LEAN_BINARY \ @@ -418,7 +418,7 @@ ifeq ($(TARGET),IOS) CXXFLAGS += -mios-simulator-version-min=$(MIN_SDK_VERSION) \ -arch x86_64 \ -fembed-bitcode \ - -D__thread= \ + -D__thread=thread_local \ -DUSE_GEMM_FOR_CONV \ -Wno-c++11-narrowing \ -DTF_LEAN_BINARY \ diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 835d68489e..715eb51577 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -201,7 +201,8 @@ tensorflow/contrib/makefile/compile_ios_protobuf.sh Then, you will need to compile the nsync library for iOS: -```export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` +```bash +export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh` export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios` ``` diff --git a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc index cb2fde7dba..f91e377049 100644 --- a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc +++ b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc @@ -35,14 +35,18 @@ limitations under the License. namespace tensorflow { +static void StartAbortRendevous(Rendezvous* rendez, const Status& s) { + rendez->StartAbort(s); + rendez->Unref(); +} + BaseRendezvousMgr::BaseRendezvousMgr(const WorkerEnv* worker_env) : worker_env_(worker_env) {} BaseRendezvousMgr::~BaseRendezvousMgr() { for (auto& p : table_) { - BaseRemoteRendezvous* rendez = p.second; - rendez->StartAbort(errors::Aborted("Shutdown")); - rendez->Unref(); + auto rendez = p.second; + StartAbortRendevous(rendez, errors::Aborted("Shutdown")); } } @@ -52,7 +56,7 @@ RemoteRendezvous* BaseRendezvousMgr::Find(int64 step_id) { BaseRemoteRendezvous* BaseRendezvousMgr::FindOrCreate(int64 step_id) { mutex_lock l(mu_); - Table::iterator iter = table_.find(step_id); + auto iter = table_.find(step_id); if (iter == table_.end()) { auto rr = Create(step_id, worker_env_); iter = table_.insert({step_id, rr}).first; @@ -64,7 +68,7 @@ BaseRemoteRendezvous* BaseRendezvousMgr::FindOrCreate(int64 step_id) { void BaseRendezvousMgr::RecvLocalAsync(int64 step_id, const Rendezvous::ParsedKey& parsed, Rendezvous::DoneCallback done) { - BaseRemoteRendezvous* rendez = FindOrCreate(step_id); + auto rendez = FindOrCreate(step_id); using namespace std::placeholders; Rendezvous::DoneCallback done_cb = std::bind( [rendez](Rendezvous::DoneCallback done, @@ -101,15 +105,15 @@ void BaseRendezvousMgr::Cleanup(int64 step_id) { Rendezvous* rendez = nullptr; { mutex_lock l(mu_); - Table::iterator iter = table_.find(step_id); + auto iter = table_.find(step_id); if (iter != table_.end()) { rendez = iter->second; table_.erase(iter); } } - if (!rendez) return; - rendez->StartAbort(errors::Aborted("Cleanup ", step_id)); - rendez->Unref(); + if (rendez) { + StartAbortRendevous(rendez, errors::Aborted("Cleanup ", step_id)); + } } void BaseRendezvousMgr::CleanupAll() { @@ -122,8 +126,7 @@ void BaseRendezvousMgr::CleanupAll() { table_.clear(); } for (auto rendez : rendezs) { - rendez->StartAbort(errors::Aborted("Shutdown")); - rendez->Unref(); + StartAbortRendevous(rendez, errors::Aborted("Shutdown")); } } @@ -165,7 +168,7 @@ Status BaseRemoteRendezvous::Initialize(WorkerSession* session) { session_ = session; std::swap(deferred_calls, deferred_calls_); } - for (DeferredCall& call : deferred_calls) { + for (auto& call : deferred_calls) { RecvLocalAsyncInternal(call.parsed, std::move(call.done)); } return Status::OK(); diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index 46c21dcef0..25b35a6dd7 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -310,7 +310,7 @@ class OpKernelConstruction { FunctionLibraryRuntime* function_library() const { return flib_; } // The GraphDef version whose behavior we should follow. - const int graph_def_version() const { return graph_def_version_; } + int graph_def_version() const { return graph_def_version_; } // Helper routines for the OP_REQUIRES macros void CtxFailure(Status s); diff --git a/tensorflow/core/profiler/g3doc/advise.md b/tensorflow/core/profiler/g3doc/advise.md index d87b0d8603..d0de8317f6 100644 --- a/tensorflow/core/profiler/g3doc/advise.md +++ b/tensorflow/core/profiler/g3doc/advise.md @@ -86,7 +86,7 @@ For example: * Checks RecvTensor RPC latency and bandwidth. * Checks CPU/Memory utilization of the job. -####AcceleratorUtilization Checker +#### AcceleratorUtilization Checker * Checks what percentage of time the accelerator spends on computation. #### OperationChecker @@ -100,7 +100,7 @@ For example: * Checks the most expensive graph nodes. * Checks the most expensive graph-building Python codes. -####Contribute Your Checker +#### Contribute Your Checker Follow examples of accelerator_utilization_checker.h diff --git a/tensorflow/core/profiler/g3doc/command_line.md b/tensorflow/core/profiler/g3doc/command_line.md index 857b5e6459..e2839a682f 100644 --- a/tensorflow/core/profiler/g3doc/command_line.md +++ b/tensorflow/core/profiler/g3doc/command_line.md @@ -51,7 +51,7 @@ It defines _checkpoint_variable op type. It also provides checkpointed tensors' Note: this feature is not well maintained now. -###Start `tfprof` +### Start `tfprof` #### Build `tfprof` @@ -140,9 +140,9 @@ tfprof> -output ``` -###Examples +### Examples -####Profile Python Time +#### Profile Python Time ```shell # Requires --graph_path --op_log_path tfprof> code -max_depth 1000 -show_name_regexes .*model_analyzer.*py.* -select micros -account_type_regexes .* -order_by micros diff --git a/tensorflow/core/profiler/g3doc/options.md b/tensorflow/core/profiler/g3doc/options.md index 15712d04c2..ddee63ad42 100644 --- a/tensorflow/core/profiler/g3doc/options.md +++ b/tensorflow/core/profiler/g3doc/options.md @@ -1,6 +1,6 @@ -##Options +## Options -###Overview +### Overview For all tfprof views, the profiles are processed with the following procedures @@ -35,7 +35,7 @@ For all tfprof views, the profiles are processed with the following procedures 4) Finally, the filtered data structure is output in a format depending on the `-output` option. -####Option Semantics In Different View +#### Option Semantics In Different View options usually have the same semantics in different views. However, some can vary. For example `-max_depth` in scope view means the depth of name scope tree. In op view, it means the length of operation list. @@ -68,7 +68,7 @@ output_bytes: The memory output by the operation. It's not necessarily requested by the current operation. For example, it can be a tensor forwarded from input to output, with in-place mutation. -###Docs +### Docs `-max_depth`: Show nodes that are at most this number of hops from starting node in the data structure. diff --git a/tensorflow/core/profiler/g3doc/profile_memory.md b/tensorflow/core/profiler/g3doc/profile_memory.md index a00683d062..6eda5abdd9 100644 --- a/tensorflow/core/profiler/g3doc/profile_memory.md +++ b/tensorflow/core/profiler/g3doc/profile_memory.md @@ -1,4 +1,4 @@ -##Profile Memory +## Profile Memory It is generally a good idea to visualize the memory usage in timeline. It allows you to see the memory consumption of each GPU over time. diff --git a/tensorflow/core/profiler/g3doc/profile_model_architecture.md b/tensorflow/core/profiler/g3doc/profile_model_architecture.md index a42b2e918d..61bb66bd21 100644 --- a/tensorflow/core/profiler/g3doc/profile_model_architecture.md +++ b/tensorflow/core/profiler/g3doc/profile_model_architecture.md @@ -1,9 +1,9 @@ -##Profile Model Architecture +## Profile Model Architecture * [Profile Model Parameters](#profile-model-parameters) * [Profile Model Float Operations](#profile-model-float-operations) -###Profile Model Parameters +### Profile Model Parameters Notes: `VariableV2` operation type might contain variables created by TensorFlow @@ -39,9 +39,9 @@ param_stats = tf.profiler.profile( sys.stdout.write('total_params: %d\n' % param_stats.total_parameters) ``` -###Profile Model Float Operations +### Profile Model Float Operations -####Caveats +#### Caveats For an operation to have float operation statistics: diff --git a/tensorflow/core/profiler/g3doc/profile_time.md b/tensorflow/core/profiler/g3doc/profile_time.md index e11a75553b..4aafc697a9 100644 --- a/tensorflow/core/profiler/g3doc/profile_time.md +++ b/tensorflow/core/profiler/g3doc/profile_time.md @@ -1,4 +1,4 @@ -##Profile Time +## Profile Time * [Times in TensorFlow and tfprof](#times-in-tensorflow-and-tfprof) * [Profile by Python Code](#profile-by-python-code) @@ -7,7 +7,7 @@ * [Profile by Name Scope](#profile-by-name-scope) -###Times in TensorFlow and tfprof +### Times in TensorFlow and tfprof When we run a model, Tensorflow schedules and runs the nodes (operations) in the graph. An operation can be placed on an accelerator or on CPU. @@ -37,7 +37,7 @@ When an operation is placed on CPU, it will completely run on CPU. Hence, should be 0. -###Profile by Python Code +### Profile by Python Code ```python # In code view, the time of each line of Python code is the aggregated # times of all operations created by that line. @@ -112,7 +112,7 @@ Set ```-output timeline:outfile=``` to generate timeline instead of st -###Profile by Operation Type +### Profile by Operation Type ```python # In op view, you can view the aggregated time of each operation type. tfprof> op -select micros,occurrence -order_by micros @@ -138,7 +138,7 @@ MatMul 618.97ms (63.56%, 16.51%), |/job:worker/replica:0/ ``` -###Profile by Graph +### Profile by Graph Usually, use graph view to generate a timeline to visualize the result. @@ -163,7 +163,7 @@ Open a Chrome browser, enter URL chrome://tracing and load the timeline file. ****************************************************** ``` -###Profile by Name Scope +### Profile by Name Scope Usually scope view allows you to pin point the problematic places if you have properly named your operations with tf.name_scope or tf.variable_scope. diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 43e09906f7..d5e481520c 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -151,10 +151,10 @@ Take the following steps to install TensorFlow with Virtualenv: (tensorflow)$ pip install --upgrade tensorflow-gpu # for Python 2.7 and GPU (tensorflow)$ pip3 install --upgrade tensorflow-gpu # for Python 3.n and GPU - If the preceding command succeeds, skip Step 5. If the preceding - command fails, perform Step 5. + If the preceding command succeeds, skip Step 6. If the preceding + command fails, perform Step 6. - 5. (Optional) If Step 4 failed (typically because you invoked a pip version + 6. (Optional) If Step 5 failed (typically because you invoked a pip version lower than 8.1), install TensorFlow in the active virtualenv environment by issuing a command of the following format: diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index be6a490ff9..3025c9971a 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -71,12 +71,14 @@ Use that package at your own risk. ## Installing with native pip -If the following version of Python is not installed on your machine, +If one of the following versions of Python is not installed on your machine, install it now: * [Python 3.5.x 64-bit from python.org](https://www.python.org/downloads/release/python-352/) + * [Python 3.6.x 64-bit from python.org](https://www.python.org/downloads/release/python-362/) -Note that Python 3.5.x comes with the pip3 package manager, which is the +-TensorFlow supports Python 3.5.x and 3.6.x on Windows. +Note that Python 3 comes with the pip3 package manager, which is the program you'll use to install TensorFlow. To install TensorFlow, start a terminal. Then issue the appropriate diff --git a/tensorflow/examples/speech_commands/README.md b/tensorflow/examples/speech_commands/README.md index 3b78210129..63be04ee58 100644 --- a/tensorflow/examples/speech_commands/README.md +++ b/tensorflow/examples/speech_commands/README.md @@ -1,4 +1,4 @@ # Speech Commands Example This is a basic speech recognition example. For more information, see the -tutorial at http://tensorflow.org/tutorials/audio_recognition. +tutorial at https://www.tensorflow.org/versions/master/tutorials/audio_recognition. diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 44ab1a622e..a8434d0c99 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -2473,7 +2473,7 @@ class _IndicatorColumn(_DenseColumn, weighted_column = sparse_ops.sparse_merge( sp_ids=id_tensor, sp_values=weight_tensor, - vocab_size=self._variable_shape[-1]) + vocab_size=int(self._variable_shape[-1])) return sparse_ops.sparse_tensor_to_dense(weighted_column) dense_id_tensor = sparse_ops.sparse_tensor_to_dense( diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index b14ec73ba2..3057776391 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -3206,6 +3206,20 @@ class IndicatorColumnTest(test.TestCase): with _initialized_session(): self.assertAllEqual([[0, 0, 1], [1, 0, 0]], indicator_tensor.eval()) + def test_transform_with_weighted_column(self): + # Github issue 12557 + ids = fc.categorical_column_with_vocabulary_list( + key='ids', vocabulary_list=('a', 'b', 'c')) + weights = fc.weighted_categorical_column(ids, 'weights') + indicator = fc.indicator_column(weights) + features = { + 'ids': constant_op.constant(['c', 'b', 'a'], shape=(1, 3)), + 'weights': constant_op.constant([2., 4., 6.], shape=(1, 3)) + } + indicator_tensor = _transform_features(features, [indicator])[indicator] + with _initialized_session(): + self.assertAllEqual([[6., 4., 2.]], indicator_tensor.eval()) + def test_linear_model(self): animal = fc.indicator_column( fc.categorical_column_with_identity('animal', num_buckets=4)) diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h index 8e3155475c..f2b35bcb43 100644 --- a/tensorflow/stream_executor/device_description.h +++ b/tensorflow/stream_executor/device_description.h @@ -82,7 +82,7 @@ class DeviceDescription { // Returns the limit on the number of simultaneously resident blocks // on a multiprocessor. - const uint64 blocks_per_core_limit() const { return blocks_per_core_limit_; } + uint64 blocks_per_core_limit() const { return blocks_per_core_limit_; } // Returns the limit on the total number of threads that can be launched in a // single block; i.e. the limit on x * y * z dimensions of a ThreadDim. @@ -141,7 +141,7 @@ class DeviceDescription { uint64 device_memory_size() const { return device_memory_size_; } // Returns the device's core clock rate in GHz. - const float clock_rate_ghz() const { return clock_rate_ghz_; } + float clock_rate_ghz() const { return clock_rate_ghz_; } // Returns whether ECC is enabled. bool ecc_enabled() const { return ecc_enabled_; } diff --git a/tensorflow/stream_executor/kernel.h b/tensorflow/stream_executor/kernel.h index d9d40d77bd..8ef091f929 100644 --- a/tensorflow/stream_executor/kernel.h +++ b/tensorflow/stream_executor/kernel.h @@ -302,7 +302,7 @@ class KernelArgIterator { // // Returns a default-constructed KernelArg if there is no next argument. KernelArg next() { - KernelArg result; + KernelArg result = {}; if (!has_next()) { return result; } else if ((shmem_indices_iter_ != shmem_indices_end_) && diff --git a/tensorflow/tools/ci_build/update_version.py b/tensorflow/tools/ci_build/update_version.py index e525e11397..4405678a6b 100755 --- a/tensorflow/tools/ci_build/update_version.py +++ b/tensorflow/tools/ci_build/update_version.py @@ -276,8 +276,9 @@ def check_for_lingering_string(lingering_string): """Check for given lingering strings.""" formatted_string = lingering_string.replace(".", r"\.") try: - linger_strs = subprocess.check_output( - ['grep', '-rnoH', formatted_string, TF_SRC_DIR]).split("\n") + linger_str_output = subprocess.check_output( + ["grep", "-rnoH", formatted_string, TF_SRC_DIR]) + linger_strs = linger_str_output.decode("utf8").split("\n") except subprocess.CalledProcessError: linger_strs = [] diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index bb6334942f..d62316964f 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -84,6 +84,7 @@ py_binary( "//tensorflow/python/saved_model", "//tensorflow/python:spectral_ops_test_util", "//tensorflow/python/tools:tools_pip", + "//tensorflow/python/eager:eager_pip", # These targets don't build on Windows yet. Exclude them for now. # "//tensorflow/contrib/ndlstm", # "//tensorflow/contrib/slim", diff --git a/third_party/sqlite.BUILD b/third_party/sqlite.BUILD index f593b71542..9840d7b151 100644 --- a/third_party/sqlite.BUILD +++ b/third_party/sqlite.BUILD @@ -2,9 +2,9 @@ # Sqlite3 library. Provides utilities for interacting # with sqlite3 databases. -licenses(["notice"]) # BSD/MIT-like license +licenses(["unencumbered"]) # Public Domain -exports_files(["LICENSE"]) +# exports_files(["LICENSE"]) cc_library( name = "sqlite", -- GitLab From 182ef4543f119201043fc173a0f61938d751325d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 23 Aug 2017 09:41:50 +0800 Subject: [PATCH 200/294] BUG: make Dimension compatible with integer --- tensorflow/python/util/compat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/util/compat.py b/tensorflow/python/util/compat.py index 07382d93df..eec3b86992 100644 --- a/tensorflow/python/util/compat.py +++ b/tensorflow/python/util/compat.py @@ -41,6 +41,7 @@ import numpy as _np import six as _six from tensorflow.python.util.all_util import remove_undocumented +from tensorflow.python.framework.tensor_shape import Dimension def as_bytes(bytes_or_text, encoding='utf-8'): @@ -110,7 +111,7 @@ def as_str_any(value): # Numpy 1.8 scalars don't inherit from numbers.Integral in Python 3, so we # need to check them specifically. The same goes from Real and Complex. -integral_types = (_numbers.Integral, _np.integer) +integral_types = (_numbers.Integral, _np.integer, Dimension) real_types = (_numbers.Real, _np.integer, _np.floating) complex_types = (_numbers.Complex, _np.number) -- GitLab From 913887651056ec4ce26344c1421388b294304af7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 23 Aug 2017 09:56:06 +0800 Subject: [PATCH 201/294] TST: add doctest --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 13356e1d8a..0884c61cc3 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -20018,7 +20018,7 @@ op { } } summary: "Reshapes a tensor." - description: "Given `tensor`, this operation returns a tensor that has the same values\nas `tensor` with shape `shape`.\n\nIf one component of `shape` is the special value -1, the size of that dimension\nis computed so that the total size remains constant. In particular, a `shape`\nof `[-1]` flattens into 1-D. At most one component of `shape` can be -1.\n\nIf `shape` is 1-D or higher, then the operation returns a tensor with shape\n`shape` filled with the values of `tensor`. In this case, the number of elements\nimplied by `shape` must be the same as the number of elements in `tensor`.\n\nFor example:\n\n```\n# tensor \'t\' is [1, 2, 3, 4, 5, 6, 7, 8, 9]\n# tensor \'t\' has shape [9]\nreshape(t, [3, 3]) ==> [[1, 2, 3],\n [4, 5, 6],\n [7, 8, 9]]\n\n# tensor \'t\' is [[[1, 1], [2, 2]],\n# [[3, 3], [4, 4]]]\n# tensor \'t\' has shape [2, 2, 2]\nreshape(t, [2, 4]) ==> [[1, 1, 2, 2],\n [3, 3, 4, 4]]\n\n# tensor \'t\' is [[[1, 1, 1],\n# [2, 2, 2]],\n# [[3, 3, 3],\n# [4, 4, 4]],\n# [[5, 5, 5],\n# [6, 6, 6]]]\n# tensor \'t\' has shape [3, 2, 3]\n# pass \'[-1]\' to flatten \'t\'\nreshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6]\n\n# -1 can also be used to infer the shape\n\n# -1 is inferred to be 9:\nreshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],\n [4, 4, 4, 5, 5, 5, 6, 6, 6]]\n# -1 is inferred to be 2:\nreshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],\n [4, 4, 4, 5, 5, 5, 6, 6, 6]]\n# -1 is inferred to be 3:\nreshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],\n [2, 2, 2],\n [3, 3, 3]],\n [[4, 4, 4],\n [5, 5, 5],\n [6, 6, 6]]]\n\n# tensor \'t\' is [7]\n# shape `[]` reshapes to a scalar\nreshape(t, []) ==> 7\n```" + description: "Given `tensor`, this operation returns a tensor that has the same values\nas `tensor` with shape `shape`.\n\nIf one component of `shape` is the special value -1, the size of that dimension\nis computed so that the total size remains constant. In particular, a `shape`\nof `[-1]` flattens into 1-D. At most one component of `shape` can be -1.\n\nIf `shape` is 1-D or higher, then the operation returns a tensor with shape\n`shape` filled with the values of `tensor`. In this case, the number of elements\nimplied by `shape` must be the same as the number of elements in `tensor`.\n\nFor example:\n\n```\n# tensor \'t\' is [1, 2, 3, 4, 5, 6, 7, 8, 9]\n# tensor \'t\' has shape [9]\nreshape(t, [3, 3]) ==> [[1, 2, 3],\n [4, 5, 6],\n [7, 8, 9]]\n\n# tensor \'t\' is [[[1, 1], [2, 2]],\n# [[3, 3], [4, 4]]]\n# tensor \'t\' has shape [2, 2, 2]\nreshape(t, [2, 4]) ==> [[1, 1, 2, 2],\n [3, 3, 4, 4]]\n\n# tensor \'t\' is [[[1, 1, 1],\n# [2, 2, 2]],\n# [[3, 3, 3],\n# [4, 4, 4]],\n# [[5, 5, 5],\n# [6, 6, 6]]]\n# tensor \'t\' has shape [3, 2, 3]\n# pass \'[-1]\' to flatten \'t\'\nreshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6]\n\n# Dimension can be used\n reshape(t, [3, tf.Dimension(3)]) ==> [[1, 2, 3],\n [4, 5, 6],\n [7, 8, 9]]\n\n# -1 can also be used to infer the shape\n\n# -1 is inferred to be 9:\nreshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],\n [4, 4, 4, 5, 5, 5, 6, 6, 6]]\n# -1 is inferred to be 2:\nreshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],\n [4, 4, 4, 5, 5, 5, 6, 6, 6]]\n# -1 is inferred to be 3:\nreshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],\n [2, 2, 2],\n [3, 3, 3]],\n [[4, 4, 4],\n [5, 5, 5],\n [6, 6, 6]]]\n\n# tensor \'t\' is [7]\n# shape `[]` reshapes to a scalar\nreshape(t, []) ==> 7\n```" } op { name: "ResizeArea" -- GitLab From 0563e1f6bf0efe6f81df00013fd28d4631485cfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 23 Aug 2017 10:07:50 +0800 Subject: [PATCH 202/294] BUG: resolve circle import --- tensorflow/python/framework/tensor_shape.py | 5 ++++- tensorflow/python/util/compat.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py index 54ec15ea66..828b61e457 100644 --- a/tensorflow/python/framework/tensor_shape.py +++ b/tensorflow/python/framework/tensor_shape.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function from tensorflow.core.framework import tensor_shape_pb2 -from tensorflow.python.util import compat class Dimension(object): @@ -26,6 +25,8 @@ class Dimension(object): def __init__(self, value): """Creates a new Dimension with the given value.""" + from tensorflow.python.util import compat + if value is None: self._value = None else: @@ -428,6 +429,8 @@ class TensorShape(object): TypeError: If dims cannot be converted to a list of dimensions. """ # TODO(irving): Eliminate the single integer special case. + from tensorflow.python.util import compat + if dims is None: self._dims = None elif isinstance(dims, compat.bytes_or_text_types): diff --git a/tensorflow/python/util/compat.py b/tensorflow/python/util/compat.py index eec3b86992..cae41a7230 100644 --- a/tensorflow/python/util/compat.py +++ b/tensorflow/python/util/compat.py @@ -40,8 +40,8 @@ import numbers as _numbers import numpy as _np import six as _six +from tensorflow.python.framework import tensor_shape from tensorflow.python.util.all_util import remove_undocumented -from tensorflow.python.framework.tensor_shape import Dimension def as_bytes(bytes_or_text, encoding='utf-8'): @@ -111,7 +111,7 @@ def as_str_any(value): # Numpy 1.8 scalars don't inherit from numbers.Integral in Python 3, so we # need to check them specifically. The same goes from Real and Complex. -integral_types = (_numbers.Integral, _np.integer, Dimension) +integral_types = (_numbers.Integral, _np.integer, tensor_shape.Dimension) real_types = (_numbers.Real, _np.integer, _np.floating) complex_types = (_numbers.Complex, _np.number) -- GitLab From 0c9ddf3ffd78196cb579d040c59a72d604152073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 23 Aug 2017 13:42:34 +0800 Subject: [PATCH 203/294] TST: add unit test --- tensorflow/python/framework/tensor_util_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py index 2760f98a6b..fee78d50f6 100644 --- a/tensorflow/python/framework/tensor_util_test.py +++ b/tensorflow/python/framework/tensor_util_test.py @@ -314,6 +314,16 @@ class TensorUtilTest(test.TestCase): shape=[3, 4], dtype=dtype))) + def testIntMixedWithDimension(self): + dtype = dtypes.int32 + nptype = np.int32 + t = tensor_util.make_tensor_proto([10, tensor_shape.Dimension(20), 30], + dtype=dtype) + self.assertEquals(dtype, t.dtype) + a = tensor_util.MakeNdarray(t) + self.assertEquals(nptype, a.dtype) + self.assertAllClose(np.array([10, 20, 30], dtype=nptype), a) + def testLong(self): t = tensor_util.make_tensor_proto(10, dtype=dtypes.int64) self.assertProtoEquals(""" -- GitLab From 075b4374392ff963f29a861c3e0d683fbe284d79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 23 Aug 2017 13:47:07 +0800 Subject: [PATCH 204/294] TST: add issue id --- tensorflow/python/framework/tensor_util_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py index fee78d50f6..ca47274e9a 100644 --- a/tensorflow/python/framework/tensor_util_test.py +++ b/tensorflow/python/framework/tensor_util_test.py @@ -315,6 +315,7 @@ class TensorUtilTest(test.TestCase): dtype=dtype))) def testIntMixedWithDimension(self): + # Github issue: 11974 dtype = dtypes.int32 nptype = np.int32 t = tensor_util.make_tensor_proto([10, tensor_shape.Dimension(20), 30], -- GitLab From 200dc87de38f14af9ea547418a57cab2ba828a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 26 Aug 2017 06:24:28 +0800 Subject: [PATCH 205/294] CLN: put back imports on the top --- tensorflow/python/framework/tensor_shape.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py index 828b61e457..54ec15ea66 100644 --- a/tensorflow/python/framework/tensor_shape.py +++ b/tensorflow/python/framework/tensor_shape.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function from tensorflow.core.framework import tensor_shape_pb2 +from tensorflow.python.util import compat class Dimension(object): @@ -25,8 +26,6 @@ class Dimension(object): def __init__(self, value): """Creates a new Dimension with the given value.""" - from tensorflow.python.util import compat - if value is None: self._value = None else: @@ -429,8 +428,6 @@ class TensorShape(object): TypeError: If dims cannot be converted to a list of dimensions. """ # TODO(irving): Eliminate the single integer special case. - from tensorflow.python.util import compat - if dims is None: self._dims = None elif isinstance(dims, compat.bytes_or_text_types): -- GitLab From 52e43b4fcdf66ef74aa3b77d0f353b631252f534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 26 Aug 2017 06:33:27 +0800 Subject: [PATCH 206/294] ENH: check dim in _FilterInt --- tensorflow/python/framework/tensor_util.py | 4 +++- tensorflow/python/util/compat.py | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index eea3d28a7e..d2d95a5f9d 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -236,7 +236,9 @@ def _FilterTuple(v): def _FilterInt(v): if isinstance(v, (list, tuple)): return _FirstNotNone([_FilterInt(x) for x in v]) - return None if isinstance(v, compat.integral_types) else _NotNone(v) + return None if isinstance( + v, + compat.integral_types, tensor_shape.Dimension) else _NotNone(v) def _FilterFloat(v): diff --git a/tensorflow/python/util/compat.py b/tensorflow/python/util/compat.py index cae41a7230..07382d93df 100644 --- a/tensorflow/python/util/compat.py +++ b/tensorflow/python/util/compat.py @@ -40,7 +40,6 @@ import numbers as _numbers import numpy as _np import six as _six -from tensorflow.python.framework import tensor_shape from tensorflow.python.util.all_util import remove_undocumented @@ -111,7 +110,7 @@ def as_str_any(value): # Numpy 1.8 scalars don't inherit from numbers.Integral in Python 3, so we # need to check them specifically. The same goes from Real and Complex. -integral_types = (_numbers.Integral, _np.integer, tensor_shape.Dimension) +integral_types = (_numbers.Integral, _np.integer) real_types = (_numbers.Real, _np.integer, _np.floating) complex_types = (_numbers.Complex, _np.number) -- GitLab From c24535359bb3945420cec6885b6181688a4cbb66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 26 Aug 2017 06:56:32 +0800 Subject: [PATCH 207/294] Revert "TST: add doctest" This reverts commit 689174abb6ddb0bdba7b4e3794a246f1e385a0fb. --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 0884c61cc3..13356e1d8a 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -20018,7 +20018,7 @@ op { } } summary: "Reshapes a tensor." - description: "Given `tensor`, this operation returns a tensor that has the same values\nas `tensor` with shape `shape`.\n\nIf one component of `shape` is the special value -1, the size of that dimension\nis computed so that the total size remains constant. In particular, a `shape`\nof `[-1]` flattens into 1-D. At most one component of `shape` can be -1.\n\nIf `shape` is 1-D or higher, then the operation returns a tensor with shape\n`shape` filled with the values of `tensor`. In this case, the number of elements\nimplied by `shape` must be the same as the number of elements in `tensor`.\n\nFor example:\n\n```\n# tensor \'t\' is [1, 2, 3, 4, 5, 6, 7, 8, 9]\n# tensor \'t\' has shape [9]\nreshape(t, [3, 3]) ==> [[1, 2, 3],\n [4, 5, 6],\n [7, 8, 9]]\n\n# tensor \'t\' is [[[1, 1], [2, 2]],\n# [[3, 3], [4, 4]]]\n# tensor \'t\' has shape [2, 2, 2]\nreshape(t, [2, 4]) ==> [[1, 1, 2, 2],\n [3, 3, 4, 4]]\n\n# tensor \'t\' is [[[1, 1, 1],\n# [2, 2, 2]],\n# [[3, 3, 3],\n# [4, 4, 4]],\n# [[5, 5, 5],\n# [6, 6, 6]]]\n# tensor \'t\' has shape [3, 2, 3]\n# pass \'[-1]\' to flatten \'t\'\nreshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6]\n\n# Dimension can be used\n reshape(t, [3, tf.Dimension(3)]) ==> [[1, 2, 3],\n [4, 5, 6],\n [7, 8, 9]]\n\n# -1 can also be used to infer the shape\n\n# -1 is inferred to be 9:\nreshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],\n [4, 4, 4, 5, 5, 5, 6, 6, 6]]\n# -1 is inferred to be 2:\nreshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],\n [4, 4, 4, 5, 5, 5, 6, 6, 6]]\n# -1 is inferred to be 3:\nreshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],\n [2, 2, 2],\n [3, 3, 3]],\n [[4, 4, 4],\n [5, 5, 5],\n [6, 6, 6]]]\n\n# tensor \'t\' is [7]\n# shape `[]` reshapes to a scalar\nreshape(t, []) ==> 7\n```" + description: "Given `tensor`, this operation returns a tensor that has the same values\nas `tensor` with shape `shape`.\n\nIf one component of `shape` is the special value -1, the size of that dimension\nis computed so that the total size remains constant. In particular, a `shape`\nof `[-1]` flattens into 1-D. At most one component of `shape` can be -1.\n\nIf `shape` is 1-D or higher, then the operation returns a tensor with shape\n`shape` filled with the values of `tensor`. In this case, the number of elements\nimplied by `shape` must be the same as the number of elements in `tensor`.\n\nFor example:\n\n```\n# tensor \'t\' is [1, 2, 3, 4, 5, 6, 7, 8, 9]\n# tensor \'t\' has shape [9]\nreshape(t, [3, 3]) ==> [[1, 2, 3],\n [4, 5, 6],\n [7, 8, 9]]\n\n# tensor \'t\' is [[[1, 1], [2, 2]],\n# [[3, 3], [4, 4]]]\n# tensor \'t\' has shape [2, 2, 2]\nreshape(t, [2, 4]) ==> [[1, 1, 2, 2],\n [3, 3, 4, 4]]\n\n# tensor \'t\' is [[[1, 1, 1],\n# [2, 2, 2]],\n# [[3, 3, 3],\n# [4, 4, 4]],\n# [[5, 5, 5],\n# [6, 6, 6]]]\n# tensor \'t\' has shape [3, 2, 3]\n# pass \'[-1]\' to flatten \'t\'\nreshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6]\n\n# -1 can also be used to infer the shape\n\n# -1 is inferred to be 9:\nreshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],\n [4, 4, 4, 5, 5, 5, 6, 6, 6]]\n# -1 is inferred to be 2:\nreshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],\n [4, 4, 4, 5, 5, 5, 6, 6, 6]]\n# -1 is inferred to be 3:\nreshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],\n [2, 2, 2],\n [3, 3, 3]],\n [[4, 4, 4],\n [5, 5, 5],\n [6, 6, 6]]]\n\n# tensor \'t\' is [7]\n# shape `[]` reshapes to a scalar\nreshape(t, []) ==> 7\n```" } op { name: "ResizeArea" -- GitLab From 211534feecd9127a7aaa6562757cb1881767415e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 26 Aug 2017 16:44:09 +0800 Subject: [PATCH 208/294] BUG: classinfo should be tuple --- tensorflow/python/framework/tensor_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index d2d95a5f9d..8c0975b11b 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -238,7 +238,7 @@ def _FilterInt(v): return _FirstNotNone([_FilterInt(x) for x in v]) return None if isinstance( v, - compat.integral_types, tensor_shape.Dimension) else _NotNone(v) + (compat.integral_types, tensor_shape.Dimension)) else _NotNone(v) def _FilterFloat(v): -- GitLab From 3e0e5841e807f008e310a1c36cbcb8a30f4c2baa Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Sun, 3 Sep 2017 14:06:47 -0700 Subject: [PATCH 209/294] Make matrix_solve_ls_test medium Times out, occasionally. --- tensorflow/python/kernel_tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index ed89a6dc48..e432998c21 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -518,7 +518,7 @@ tf_py_test( tf_py_test( name = "matrix_solve_ls_op_test", - size = "small", + size = "medium", srcs = ["matrix_solve_ls_op_test.py"], additional_deps = [ "//third_party/py/numpy", -- GitLab From 5598a3dec75cd9fddf66bdf8889e3b816e492e2c Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Sun, 27 Aug 2017 00:10:21 -0400 Subject: [PATCH 210/294] Eager execution API bug fix for tensor data types whose TF_Tensor memory layout is not the same at the tensorflow::Tensor memory layout. --- tensorflow/c/c_api_internal.h | 2 ++ tensorflow/c/eager/c_api.cc | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h index 6e44a72e2b..68c324f2b9 100644 --- a/tensorflow/c/c_api_internal.h +++ b/tensorflow/c/c_api_internal.h @@ -146,6 +146,8 @@ class TensorCApi { } }; +Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst); + TF_Tensor* TF_TensorFromTensor(const Tensor& src, TF_Status* status); Status MessageToBuffer(const tensorflow::protobuf::Message& in, TF_Buffer* out); diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 01e251a1ac..13a1825aae 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -152,9 +152,10 @@ TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) { } TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t) { - return new TFE_TensorHandle( - tensorflow::TensorCApi::MakeTensor(t->dtype, t->shape, t->buffer), - nullptr); + tensorflow::Tensor tensor; + // TODO: Add status argument and check on it. + tensorflow::TF_TensorToTensor(t, &tensor); + return new TFE_TensorHandle(tensor, nullptr); } void TFE_DeleteTensorHandle(TFE_TensorHandle* h) { delete h; } -- GitLab From 26c4a2c1ec5497227440c19177ae854861b17a31 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 28 Aug 2017 14:31:02 -0400 Subject: [PATCH 211/294] Added handling of the status flag when creating TFE tensors. --- tensorflow/c/eager/c_api.cc | 6 +++--- tensorflow/c/eager/c_api.h | 4 ++-- tensorflow/c/eager/c_api_test.cc | 7 +++++-- tensorflow/python/eager/pywrap_tfe_src.cc | 4 +++- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 13a1825aae..e70539ceef 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -151,10 +151,10 @@ TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) { return TF_SessionListDevices(ctx->session, status); } -TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t) { +TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) { tensorflow::Tensor tensor; - // TODO: Add status argument and check on it. - tensorflow::TF_TensorToTensor(t, &tensor); + status->status = tensorflow::TF_TensorToTensor(t, &tensor); + if (!status->status.ok()) return nullptr; return new TFE_TensorHandle(tensor, nullptr); } diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 476c9288f8..24a80a8f5b 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -43,7 +43,7 @@ extern TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, // placed in memory of different devices or remote address spaces. typedef struct TFE_TensorHandle TFE_TensorHandle; -extern TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t); +extern TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status); extern void TFE_DeleteTensorHandle(TFE_TensorHandle* h); extern TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h); extern int TFE_TensorHandleNumDims(TFE_TensorHandle* h); @@ -153,7 +153,7 @@ class Tensor; const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory( TFE_TensorHandle* h, TF_Status* status); -TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t); +TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t, TF_Status* status); #endif #endif // TENSORFLOW_C_EAGER_C_API_H_ diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc index 6f5c21c947..d19583a3ab 100644 --- a/tensorflow/c/eager/c_api_test.cc +++ b/tensorflow/c/eager/c_api_test.cc @@ -34,7 +34,9 @@ TFE_TensorHandle* TestMatrixTensorHandle() { TF_Tensor* t = TF_AllocateTensor( TF_FLOAT, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); - TFE_TensorHandle* th = TFE_NewTensorHandle(t); + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TF_DeleteTensor(t); return th; } @@ -383,7 +385,8 @@ TFE_TensorHandle* CreateVariable(TFE_Context* ctx, float value, memcpy(TF_TensorData(t.get()), &value, TF_TensorByteSize(t.get())); std::unique_ptr - value_handle(TFE_NewTensorHandle(t.get()), TFE_DeleteTensorHandle); + value_handle(TFE_NewTensorHandle(t.get(), status), TFE_DeleteTensorHandle); + if (TF_GetCode(status) != TF_OK) return nullptr; TFE_OpAddInput(op, value_handle.get(), status); if (TF_GetCode(status) != TF_OK) return nullptr; diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 655e3ec849..8146d9de14 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -347,7 +347,9 @@ TFE_TensorHandle* TFE_Py_NumpyToTensorHandle(PyObject* obj) { tensorflow::Tensor t; auto cppstatus = tensorflow::NdarrayToTensor(obj, &t); if (cppstatus.ok()) { - return TFE_NewTensorHandle(t); + TFE_TensorHandle* tensor = TFE_NewTensorHandle(t, cppstatus); + if (!cppstatus.ok()) return nullptr; + return tensor; } else { tensorflow::mutex_lock l(exception_class_mutex); auto msg = tensorflow::strings::StrCat( -- GitLab From eaa69ba50b915f15ebe5ee76097b9f691a744617 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 28 Aug 2017 16:02:47 -0400 Subject: [PATCH 212/294] Fixed a bug. --- tensorflow/python/eager/pywrap_tfe_src.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 8146d9de14..311bbc9477 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -347,8 +347,13 @@ TFE_TensorHandle* TFE_Py_NumpyToTensorHandle(PyObject* obj) { tensorflow::Tensor t; auto cppstatus = tensorflow::NdarrayToTensor(obj, &t); if (cppstatus.ok()) { - TFE_TensorHandle* tensor = TFE_NewTensorHandle(t, cppstatus); - if (!cppstatus.ok()) return nullptr; + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* tensor = TFE_NewTensorHandle(t, status); + if (TF_GetCode(status) != TF_OK) { + TF_DeleteStatus(status); + return nullptr; + } + TF_DeleteStatus(status); return tensor; } else { tensorflow::mutex_lock l(exception_class_mutex); -- GitLab From 0d1f2d0bcc1687699e65688d6c1ab7efc24bfe52 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 28 Aug 2017 16:44:15 -0400 Subject: [PATCH 213/294] Reverted some of the changes. --- tensorflow/c/eager/c_api.cc | 9 ++++++++- tensorflow/c/eager/c_api.h | 2 +- tensorflow/python/eager/pywrap_tfe_src.cc | 9 +-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index e70539ceef..75271b6e82 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -537,7 +537,14 @@ void TFE_ContextAddFunctionDef(TFE_Context* ctx, } // extern "C" TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t) { - return new TFE_TensorHandle(t, nullptr); + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* tensor = TFE_NewTensorHandle(t, status); + if (TF_GetCode(status) != TF_OK) { + TF_DeleteStatus(status); + return nullptr; + } + TF_DeleteStatus(status); + return tensor; } const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory( diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 24a80a8f5b..88a0dd343f 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -153,7 +153,7 @@ class Tensor; const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory( TFE_TensorHandle* h, TF_Status* status); -TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t, TF_Status* status); +TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t); #endif #endif // TENSORFLOW_C_EAGER_C_API_H_ diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 311bbc9477..655e3ec849 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -347,14 +347,7 @@ TFE_TensorHandle* TFE_Py_NumpyToTensorHandle(PyObject* obj) { tensorflow::Tensor t; auto cppstatus = tensorflow::NdarrayToTensor(obj, &t); if (cppstatus.ok()) { - TF_Status* status = TF_NewStatus(); - TFE_TensorHandle* tensor = TFE_NewTensorHandle(t, status); - if (TF_GetCode(status) != TF_OK) { - TF_DeleteStatus(status); - return nullptr; - } - TF_DeleteStatus(status); - return tensor; + return TFE_NewTensorHandle(t); } else { tensorflow::mutex_lock l(exception_class_mutex); auto msg = tensorflow::strings::StrCat( -- GitLab From bfdffb2adb9c75aa86ca822075f2d42fcd807f4f Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Mon, 28 Aug 2017 17:11:58 -0400 Subject: [PATCH 214/294] Bug fix. --- tensorflow/c/eager/c_api.cc | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 75271b6e82..e70539ceef 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -537,14 +537,7 @@ void TFE_ContextAddFunctionDef(TFE_Context* ctx, } // extern "C" TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t) { - TF_Status* status = TF_NewStatus(); - TFE_TensorHandle* tensor = TFE_NewTensorHandle(t, status); - if (TF_GetCode(status) != TF_OK) { - TF_DeleteStatus(status); - return nullptr; - } - TF_DeleteStatus(status); - return tensor; + return new TFE_TensorHandle(t, nullptr); } const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory( -- GitLab From b40716f7925ff4b1a1e2bca6e8951875b7b65ee8 Mon Sep 17 00:00:00 2001 From: Alexey Petrenko Date: Thu, 27 Jul 2017 18:31:21 +0300 Subject: [PATCH 215/294] Added SNAPPY support in CMake scripts --- tensorflow/contrib/cmake/CMakeLists.txt | 11 +++++ .../contrib/cmake/external/snappy.cmake | 47 +++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 tensorflow/contrib/cmake/external/snappy.cmake diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 422df3063e..95505944cf 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -33,6 +33,7 @@ option(tensorflow_BUILD_MORE_PYTHON_TESTS "Build more python unit tests for cont option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF) option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON) option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions") +option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" OFF) if (NOT WIN32) # Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option @@ -204,6 +205,16 @@ if(tensorflow_ENABLE_JEMALLOC_SUPPORT) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES jemalloc) include_directories(${jemalloc_INCLUDE_DIRS}) endif() +if(tensorflow_ENABLE_SNAPPY_SUPPORT) + if (WIN32) + include(snappy) + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${snappy_STATIC_LIBRARIES}) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES snappy) + include_directories(${snappy_INCLUDE_DIR}) + else() + message(FATAL_ERROR "CMake build with SNAPPY is only supported on Windows at the moment") + endif() +endif() if(WIN32) list(APPEND tensorflow_EXTERNAL_LIBRARIES wsock32 ws2_32 shlwapi) endif() diff --git a/tensorflow/contrib/cmake/external/snappy.cmake b/tensorflow/contrib/cmake/external/snappy.cmake new file mode 100644 index 0000000000..e4303131e2 --- /dev/null +++ b/tensorflow/contrib/cmake/external/snappy.cmake @@ -0,0 +1,47 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +include (ExternalProject) + +set(snappy_URL https://github.com/google/snappy.git) +set(snappy_TAG "1.1.6") +set(snappy_BUILD ${CMAKE_CURRENT_BINARY_DIR}/snappy/src/snappy) +set(snappy_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/snappy/src/snappy) + +if(WIN32) + set(snappy_STATIC_LIBRARIES ${snappy_BUILD}/$(Configuration)/snappy.lib) +endif() + +set(snappy_HEADERS + "${snappy_INCLUDE_DIR}/snappy.h" +) + +ExternalProject_Add(snappy + PREFIX snappy + GIT_REPOSITORY ${snappy_URL} + GIT_TAG ${snappy_TAG} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + BUILD_IN_SOURCE 1 + INSTALL_COMMAND "" + LOG_DOWNLOAD ON + LOG_CONFIGURE ON + LOG_BUILD ON + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON +) + +# actually enables snappy in the source code +add_definitions(-DSNAPPY) \ No newline at end of file -- GitLab From 3dd53c2c8cecd39706de6ad862379e5cefb30e94 Mon Sep 17 00:00:00 2001 From: Alexey Petrenko Date: Thu, 17 Aug 2017 16:04:29 +0300 Subject: [PATCH 216/294] Enabled SNAPPY in CMake by default, added Linux support --- tensorflow/contrib/cmake/CMakeLists.txt | 14 +++++--------- tensorflow/contrib/cmake/external/snappy.cmake | 4 +++- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 95505944cf..76cf0ad93b 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -33,7 +33,7 @@ option(tensorflow_BUILD_MORE_PYTHON_TESTS "Build more python unit tests for cont option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF) option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON) option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions") -option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" OFF) +option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON) if (NOT WIN32) # Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option @@ -206,14 +206,10 @@ if(tensorflow_ENABLE_JEMALLOC_SUPPORT) include_directories(${jemalloc_INCLUDE_DIRS}) endif() if(tensorflow_ENABLE_SNAPPY_SUPPORT) - if (WIN32) - include(snappy) - list(APPEND tensorflow_EXTERNAL_LIBRARIES ${snappy_STATIC_LIBRARIES}) - list(APPEND tensorflow_EXTERNAL_DEPENDENCIES snappy) - include_directories(${snappy_INCLUDE_DIR}) - else() - message(FATAL_ERROR "CMake build with SNAPPY is only supported on Windows at the moment") - endif() + include(snappy) + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${snappy_LIBRARIES}) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES snappy) + include_directories(${snappy_INCLUDE_DIR}) endif() if(WIN32) list(APPEND tensorflow_EXTERNAL_LIBRARIES wsock32 ws2_32 shlwapi) diff --git a/tensorflow/contrib/cmake/external/snappy.cmake b/tensorflow/contrib/cmake/external/snappy.cmake index e4303131e2..e8da726a95 100644 --- a/tensorflow/contrib/cmake/external/snappy.cmake +++ b/tensorflow/contrib/cmake/external/snappy.cmake @@ -20,7 +20,9 @@ set(snappy_BUILD ${CMAKE_CURRENT_BINARY_DIR}/snappy/src/snappy) set(snappy_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/snappy/src/snappy) if(WIN32) - set(snappy_STATIC_LIBRARIES ${snappy_BUILD}/$(Configuration)/snappy.lib) + set(snappy_LIBRARIES ${snappy_BUILD}/$(Configuration)/snappy.lib) +else() + set(snappy_LIBRARIES ${snappy_BUILD}/libsnappy.so) endif() set(snappy_HEADERS -- GitLab From fad50ea1ca3dca84a6735c80048a4262aea7fe64 Mon Sep 17 00:00:00 2001 From: Alexey Petrenko Date: Thu, 24 Aug 2017 15:19:09 +0300 Subject: [PATCH 217/294] Use latest version of Snappy instead of 1.1.6. Build Snappy as a static library rather than shared. --- tensorflow/contrib/cmake/CMakeLists.txt | 2 +- tensorflow/contrib/cmake/external/snappy.cmake | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 76cf0ad93b..c249a28556 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -207,7 +207,7 @@ if(tensorflow_ENABLE_JEMALLOC_SUPPORT) endif() if(tensorflow_ENABLE_SNAPPY_SUPPORT) include(snappy) - list(APPEND tensorflow_EXTERNAL_LIBRARIES ${snappy_LIBRARIES}) + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${snappy_STATIC_LIBRARIES}) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES snappy) include_directories(${snappy_INCLUDE_DIR}) endif() diff --git a/tensorflow/contrib/cmake/external/snappy.cmake b/tensorflow/contrib/cmake/external/snappy.cmake index e8da726a95..a35d8654fb 100644 --- a/tensorflow/contrib/cmake/external/snappy.cmake +++ b/tensorflow/contrib/cmake/external/snappy.cmake @@ -15,14 +15,14 @@ include (ExternalProject) set(snappy_URL https://github.com/google/snappy.git) -set(snappy_TAG "1.1.6") +set(snappy_TAG "55924d11095df25ab25c405fadfe93d0a46f82eb") set(snappy_BUILD ${CMAKE_CURRENT_BINARY_DIR}/snappy/src/snappy) set(snappy_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/snappy/src/snappy) if(WIN32) - set(snappy_LIBRARIES ${snappy_BUILD}/$(Configuration)/snappy.lib) + set(snappy_STATIC_LIBRARIES ${snappy_BUILD}/$(Configuration)/snappy.lib) else() - set(snappy_LIBRARIES ${snappy_BUILD}/libsnappy.so) + set(snappy_STATIC_LIBRARIES ${snappy_BUILD}/libsnappy.a) endif() set(snappy_HEADERS @@ -42,7 +42,8 @@ ExternalProject_Add(snappy CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DSNAPPY_BUILD_TESTS:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ) # actually enables snappy in the source code -- GitLab From 621c2dcf27fdf088bc46cd09d1ffd2d2f40cf7c6 Mon Sep 17 00:00:00 2001 From: "karl@kubx.ca" Date: Fri, 28 Jul 2017 18:33:03 -0400 Subject: [PATCH 218/294] Build rules and skeleton for Java operation wrappers generator --- tensorflow/java/BUILD | 46 +++++++++++++++- tensorflow/java/src/gen/cc/gen_util.h | 46 ++++++++++++++++ tensorflow/java/src/gen/cc/ops/main.cc | 33 ++++++++++++ .../java/src/gen/cc/ops/op_generator.cc | 52 +++++++++++++++++++ tensorflow/java/src/gen/cc/ops/op_generator.h | 37 +++++++++++++ tensorflow/tensorflow.bzl | 51 ++++++++++++++++++ 6 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 tensorflow/java/src/gen/cc/gen_util.h create mode 100644 tensorflow/java/src/gen/cc/ops/main.cc create mode 100644 tensorflow/java/src/gen/cc/ops/op_generator.cc create mode 100644 tensorflow/java/src/gen/cc/ops/op_generator.h diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index 64b3767735..50dca9270c 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -6,6 +6,7 @@ package(default_visibility = ["//visibility:private"]) licenses(["notice"]) # Apache 2.0 load("build_defs", "JAVACOPTS") +load("//tensorflow:tensorflow.bzl", "tf_copts", "tf_gen_op_wrappers_java") java_library( name = "tensorflow", @@ -34,12 +35,38 @@ filegroup( filegroup( name = "java_op_sources", - srcs = glob(["src/main/java/org/tensorflow/op/**/*.java"]), + srcs = glob(["src/main/java/org/tensorflow/op/**/*.java"]) + [ + ":java_op_gensources", + ], visibility = [ "//tensorflow/java:__pkg__", ], ) +tf_gen_op_wrappers_java( + name = "java_op_gensources", + ops_libs = [ + "array_ops", + "candidate_sampling_ops", + "control_flow_ops", + "data_flow_ops", + "image_ops", + "io_ops", + "linalg_ops", + "logging_ops", + "math_ops", + "nn_ops", + "no_op", + "parsing_ops", + "random_ops", + "sparse_ops", + "state_ops", + "string_ops", + "training_ops", + "user_ops", + ], +) + java_library( name = "testutil", testonly = 1, @@ -282,3 +309,20 @@ filegroup( ), visibility = ["//tensorflow:__subpackages__"], ) + +cc_library( + name = "java_ops_gentool", + srcs = glob([ + "src/gen/cc/gen_util.h", + "src/gen/cc/ops/*.h", + "src/gen/cc/ops/*.cc", + ]), + copts = tf_copts(), + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:proto_text", + "//tensorflow/core:protos_all_cc", + ], +) diff --git a/tensorflow/java/src/gen/cc/gen_util.h b/tensorflow/java/src/gen/cc/gen_util.h new file mode 100644 index 0000000000..b6c419dcf8 --- /dev/null +++ b/tensorflow/java/src/gen/cc/gen_util.h @@ -0,0 +1,46 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_JAVA_SRC_GEN_CC_GEN_UTIL_H_ +#define TENSORFLOW_JAVA_SRC_GEN_CC_GEN_UTIL_H_ + +#include + +namespace tensorflow { +namespace gen_util { + +inline std::string StripChar(const std::string& str, const char old_char) { + std::string ret; + for (const char& c : str) { + if (c != old_char) { + ret.push_back(c); + } + } + return ret; +} + +inline std::string ReplaceChar(const std::string& str, const char old_char, + const char new_char) { + std::string ret; + for (const char& c : str) { + ret.push_back(c == old_char ? new_char : c); + } + return ret; +} + +} // namespace gen_util +} // namespace tensorflow + +#endif // TENSORFLOW_JAVA_SRC_GEN_CC_GEN_UTIL_H_ diff --git a/tensorflow/java/src/gen/cc/ops/main.cc b/tensorflow/java/src/gen/cc/ops/main.cc new file mode 100644 index 0000000000..eff93e9ee5 --- /dev/null +++ b/tensorflow/java/src/gen/cc/ops/main.cc @@ -0,0 +1,33 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + ==============================================================================*/ + +#include + +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/java/src/gen/cc/ops/op_generator.h" + +int main(int argc, char* argv[]) { + tensorflow::port::InitMain(argv[0], &argc, &argv); + const std::string& output_dir = argv[1]; + const std::string& ops_lib = argv[2]; + + LOG(INFO) << "Generating Java operation wrappers for \"" + << ops_lib << "\" library"; + + tensorflow::OpGenerator generator(ops_lib); + + return generator.Run(output_dir + "/src/main/java/"); +} diff --git a/tensorflow/java/src/gen/cc/ops/op_generator.cc b/tensorflow/java/src/gen/cc/ops/op_generator.cc new file mode 100644 index 0000000000..34b97948af --- /dev/null +++ b/tensorflow/java/src/gen/cc/ops/op_generator.cc @@ -0,0 +1,52 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/java/src/gen/cc/gen_util.h" +#include "tensorflow/java/src/gen/cc/ops/op_generator.h" + +namespace tensorflow { + +OpGenerator::OpGenerator(const std::string& ops_lib) + : ops_lib(ops_lib) { + + const std::string& lib_name = ops_lib.substr(0, ops_lib.find_last_of('_')); + package_name = gen_util::StripChar("org.tensorflow.op." + lib_name, '_'); +} + +OpGenerator::~OpGenerator() {} + +int OpGenerator::Run(const std::string& output_path) { + tensorflow::Env* env = tensorflow::Env::Default(); + std::string package_path(output_path); + package_path += gen_util::ReplaceChar(package_name, '.', '/'); + + if (!env->FileExists(package_path).ok()) { + TF_CHECK_OK(env->RecursivelyCreateDir(package_path)); + } + + OpList ops; + OpRegistry::Global()->Export(true, &ops); + + // TODO(karllessard) generate wrappers from collected ops + + return 0; +} + +} // namespace tensorflow diff --git a/tensorflow/java/src/gen/cc/ops/op_generator.h b/tensorflow/java/src/gen/cc/ops/op_generator.h new file mode 100644 index 0000000000..fc52d1ac18 --- /dev/null +++ b/tensorflow/java/src/gen/cc/ops/op_generator.h @@ -0,0 +1,37 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_JAVA_SRC_GEN_CC_OPS_OP_GENERATOR_H_ +#define TENSORFLOW_JAVA_SRC_GEN_CC_OPS_OP_GENERATOR_H_ + +#include + +namespace tensorflow { + +class OpGenerator { + public: + explicit OpGenerator(const std::string& ops_lib); + virtual ~OpGenerator(); + + int Run(const std::string& output_path); + + private: + const std::string ops_lib; + std::string package_name; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_JAVA_SRC_GEN_CC_OPS_OP_GENERATOR_H_ diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index f0301937fb..efb2d9a494 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -430,6 +430,57 @@ def tf_gen_op_wrapper_py(name, ],) +# Given a list of "ops_libs" (a list of files in the core/ops directory +# without their .cc extensions), generate Java wrapper code for all operations +# found in the ops files. +# Then, combine all those source files into a single archive (.srcjar). +# +# For example: +# tf_gen_op_wrappers_java("java_op_gensources", [ "array_ops", "math_ops" ]) +# +# will create a genrule named "java_op_gensources" that first generate source files: +# ops/src/main/java/org/tensorflow/op/array/*.java +# ops/src/main/java/org/tensorflow/op/math/*.java +# +# and then archive those source files in: +# ops/java_op_gensources.srcjar +# +def tf_gen_op_wrappers_java(name, + ops_libs=[], + ops_libs_pkg="//tensorflow/core", + out_dir="ops/", + gen_main=clean_dep("//tensorflow/java:java_ops_gentool"), + visibility=["//tensorflow/java:__pkg__"]): + + gen_tools = [] + gen_cmds = ["rm -rf $(@D)"] # Always start from fresh when generating source files + + # Construct an op generator binary for each ops library. + for ops_lib in ops_libs: + gen_tool = out_dir + ops_lib + "_gentool" + + native.cc_binary( + name=gen_tool, + copts=tf_copts(), + linkopts=["-lm"], + linkstatic=1, # Faster to link this one-time-use binary dynamically + deps=[gen_main, ops_libs_pkg + ":" + ops_lib + "_op_lib"]) + + gen_tools += [":" + gen_tool] + gen_cmds += ["$(location :" + gen_tool + ") $(@D) " + ops_lib] + + # Generate a source archive containing generated code for these ops. + gen_srcjar = out_dir + name + ".srcjar" + gen_cmds += ["$(location @local_jdk//:jar) cMf $(location :" + gen_srcjar + ") -C $(@D) ."] + + native.genrule( + name=name, + srcs=["@local_jdk//:jar"], + outs=[gen_srcjar], + tools=gen_tools, + cmd='&&'.join(gen_cmds)) + + # Define a bazel macro that creates cc_test for tensorflow. # TODO(opensource): we need to enable this to work around the hidden symbol # __cudaRegisterFatBinary error. Need more investigations. -- GitLab From 7dcc1ab72e20facff8a7b25ae714e6f3e71e9682 Mon Sep 17 00:00:00 2001 From: "karl@kubx.ca" Date: Mon, 14 Aug 2017 23:49:29 -0400 Subject: [PATCH 219/294] Integrating comments from the 1st code review round. --- tensorflow/java/BUILD | 47 +++++++------- tensorflow/java/src/gen/cc/gen_util.h | 46 -------------- tensorflow/java/src/gen/cc/op_gen_main.cc | 61 +++++++++++++++++++ .../java/src/gen/cc/{ops => }/op_generator.cc | 33 +++++----- tensorflow/java/src/gen/cc/op_generator.h | 59 ++++++++++++++++++ tensorflow/java/src/gen/cc/ops/main.cc | 33 ---------- tensorflow/java/src/gen/cc/ops/op_generator.h | 37 ----------- tensorflow/java/src/gen/gen_ops.bzl | 53 ++++++++++++++++ tensorflow/tensorflow.bzl | 51 ---------------- 9 files changed, 212 insertions(+), 208 deletions(-) delete mode 100644 tensorflow/java/src/gen/cc/gen_util.h create mode 100644 tensorflow/java/src/gen/cc/op_gen_main.cc rename tensorflow/java/src/gen/cc/{ops => }/op_generator.cc (52%) create mode 100644 tensorflow/java/src/gen/cc/op_generator.h delete mode 100644 tensorflow/java/src/gen/cc/ops/main.cc delete mode 100644 tensorflow/java/src/gen/cc/ops/op_generator.h create mode 100644 tensorflow/java/src/gen/gen_ops.bzl diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index 50dca9270c..11c5a2d302 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -5,8 +5,9 @@ package(default_visibility = ["//visibility:private"]) licenses(["notice"]) # Apache 2.0 -load("build_defs", "JAVACOPTS") -load("//tensorflow:tensorflow.bzl", "tf_copts", "tf_gen_op_wrappers_java") +load(":build_defs.bzl", "JAVACOPTS") +load(":src/gen/gen_ops.bzl", "java_op_gen_srcjar") +load("//tensorflow:tensorflow.bzl", "tf_copts") java_library( name = "tensorflow", @@ -36,15 +37,16 @@ filegroup( filegroup( name = "java_op_sources", srcs = glob(["src/main/java/org/tensorflow/op/**/*.java"]) + [ - ":java_op_gensources", + ":java_op_gen_sources", ], visibility = [ "//tensorflow/java:__pkg__", ], ) -tf_gen_op_wrappers_java( - name = "java_op_gensources", +java_op_gen_srcjar( + name = "java_op_gen_sources", + gen_tool = "java_op_gen_tool", ops_libs = [ "array_ops", "candidate_sampling_ops", @@ -67,6 +69,24 @@ tf_gen_op_wrappers_java( ], ) +# Build the gen tool as a library, as it will be linked to a core/ops binary +# file before making it an executable. See java_op_gen_srcjar(). +cc_library( + name = "java_op_gen_tool", + srcs = glob([ + "src/gen/cc/*.h", + "src/gen/cc/*.cc", + ]), + copts = tf_copts(), + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:proto_text", + "//tensorflow/core:protos_all_cc", + ], +) + java_library( name = "testutil", testonly = 1, @@ -309,20 +329,3 @@ filegroup( ), visibility = ["//tensorflow:__subpackages__"], ) - -cc_library( - name = "java_ops_gentool", - srcs = glob([ - "src/gen/cc/gen_util.h", - "src/gen/cc/ops/*.h", - "src/gen/cc/ops/*.cc", - ]), - copts = tf_copts(), - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:proto_text", - "//tensorflow/core:protos_all_cc", - ], -) diff --git a/tensorflow/java/src/gen/cc/gen_util.h b/tensorflow/java/src/gen/cc/gen_util.h deleted file mode 100644 index b6c419dcf8..0000000000 --- a/tensorflow/java/src/gen/cc/gen_util.h +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_JAVA_SRC_GEN_CC_GEN_UTIL_H_ -#define TENSORFLOW_JAVA_SRC_GEN_CC_GEN_UTIL_H_ - -#include - -namespace tensorflow { -namespace gen_util { - -inline std::string StripChar(const std::string& str, const char old_char) { - std::string ret; - for (const char& c : str) { - if (c != old_char) { - ret.push_back(c); - } - } - return ret; -} - -inline std::string ReplaceChar(const std::string& str, const char old_char, - const char new_char) { - std::string ret; - for (const char& c : str) { - ret.push_back(c == old_char ? new_char : c); - } - return ret; -} - -} // namespace gen_util -} // namespace tensorflow - -#endif // TENSORFLOW_JAVA_SRC_GEN_CC_GEN_UTIL_H_ diff --git a/tensorflow/java/src/gen/cc/op_gen_main.cc b/tensorflow/java/src/gen/cc/op_gen_main.cc new file mode 100644 index 0000000000..16cd3e8c71 --- /dev/null +++ b/tensorflow/java/src/gen/cc/op_gen_main.cc @@ -0,0 +1,61 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + ==============================================================================*/ + +#include +#include + +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/util/command_line_flags.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/java/src/gen/cc/op_generator.h" + +namespace tensorflow { +namespace op_gen { + +const char kUsageHeader[] = + "\n\nGenerator of operation wrappers in Java.\n\n" + "This executable generates wrappers for all operations registered in the\n" + "ops file it has been linked to (i.e. one of the /core/ops/*.o binaries).\n" + "Generated files are output to the path provided as an argument, under\n" + "their appropriate package and using a maven-style directory layout.\n\n"; + +} // namespace op_gen +} // namespace tensorflow + +int main(int argc, char* argv[]) { + tensorflow::string ops_file; + tensorflow::string output_dir; + std::vector flag_list = { + tensorflow::Flag("file", &ops_file, + "name of the ops file linked to this executable"), + tensorflow::Flag("output", &output_dir, + "base directory where to output generated files") + }; + tensorflow::string usage = tensorflow::op_gen::kUsageHeader; + usage += tensorflow::Flags::Usage(argv[0], flag_list); + bool parsed_flags_ok = tensorflow::Flags::Parse(&argc, argv, flag_list); + QCHECK(parsed_flags_ok && !ops_file.empty() && !output_dir.empty()) << usage; + tensorflow::port::InitMain(usage.c_str(), &argc, &argv); + + tensorflow::OpGenerator generator(tensorflow::Env::Default(), output_dir); + tensorflow::OpList ops; + tensorflow::OpRegistry::Global()->Export(true, &ops); + tensorflow::Status status = generator.Run(ops_file, ops); + TF_QCHECK_OK(status); + + return 0; +} diff --git a/tensorflow/java/src/gen/cc/ops/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc similarity index 52% rename from tensorflow/java/src/gen/cc/ops/op_generator.cc rename to tensorflow/java/src/gen/cc/op_generator.cc index 34b97948af..bda1f68abc 100644 --- a/tensorflow/java/src/gen/cc/ops/op_generator.cc +++ b/tensorflow/java/src/gen/cc/op_generator.cc @@ -15,38 +15,33 @@ limitations under the License. #include -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow/java/src/gen/cc/gen_util.h" -#include "tensorflow/java/src/gen/cc/ops/op_generator.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/java/src/gen/cc/op_generator.h" namespace tensorflow { -OpGenerator::OpGenerator(const std::string& ops_lib) - : ops_lib(ops_lib) { - - const std::string& lib_name = ops_lib.substr(0, ops_lib.find_last_of('_')); - package_name = gen_util::StripChar("org.tensorflow.op." + lib_name, '_'); +OpGenerator::OpGenerator(Env* env, const string& output_dir) + : env(env), output_path(output_dir + "/src/main/java/") { } OpGenerator::~OpGenerator() {} -int OpGenerator::Run(const std::string& output_path) { - tensorflow::Env* env = tensorflow::Env::Default(); - std::string package_path(output_path); - package_path += gen_util::ReplaceChar(package_name, '.', '/'); +Status OpGenerator::Run(const string& ops_file, const OpList& ops) { + const string& lib_name = ops_file.substr(0, ops_file.find_last_of('_')); + const string package_name = + str_util::StringReplace("org.tensorflow.op." + lib_name, "_", "", true); + const string package_path = + output_path + str_util::StringReplace(package_name, ".", "/", true); if (!env->FileExists(package_path).ok()) { TF_CHECK_OK(env->RecursivelyCreateDir(package_path)); } - OpList ops; - OpRegistry::Global()->Export(true, &ops); - - // TODO(karllessard) generate wrappers from collected ops + LOG(INFO) << "Generating Java wrappers for \"" << lib_name << "\" operations"; + // TODO(karllessard) generate wrappers from list of ops - return 0; + return Status::OK(); } } // namespace tensorflow diff --git a/tensorflow/java/src/gen/cc/op_generator.h b/tensorflow/java/src/gen/cc/op_generator.h new file mode 100644 index 0000000000..b0d8cb05af --- /dev/null +++ b/tensorflow/java/src/gen/cc/op_generator.h @@ -0,0 +1,59 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_JAVA_SRC_GEN_CC_OP_GENERATOR_H_ +#define TENSORFLOW_JAVA_SRC_GEN_CC_OP_GENERATOR_H_ + +#include + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +/// \brief A generator of Java operation wrappers. +/// +/// Such generator is normally ran only once per executable, outputting +/// wrappers for the ops library it has been linked with. Nonetheless, +/// it is designed to support multiple runs, giving a different list of +/// operations on each cycle. +class OpGenerator { + public: + /// \brief Create a new generator, giving an environment and an + /// output directory path. + explicit OpGenerator(Env* env, const string& output_dir); + virtual ~OpGenerator(); + + /// \brief Generates wrappers for the given list of 'ops'. + /// + /// The list of operations should be issued from the library whose + /// file name starts with 'ops_file' (see /core/ops/*.cc). + /// + /// Generated files are output under this directory: + /// /src/main/java/org/tensorflow/java/op/ + /// where + /// 'output_dir' is the directory passed in the constructor and + /// 'group' is extracted from the 'ops_file' name + Status Run(const string& ops_file, const OpList& ops); + + private: + Env* env; + const string output_path; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_JAVA_SRC_GEN_CC_OP_GENERATOR_H_ diff --git a/tensorflow/java/src/gen/cc/ops/main.cc b/tensorflow/java/src/gen/cc/ops/main.cc deleted file mode 100644 index eff93e9ee5..0000000000 --- a/tensorflow/java/src/gen/cc/ops/main.cc +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - ==============================================================================*/ - -#include - -#include "tensorflow/core/platform/init_main.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/java/src/gen/cc/ops/op_generator.h" - -int main(int argc, char* argv[]) { - tensorflow::port::InitMain(argv[0], &argc, &argv); - const std::string& output_dir = argv[1]; - const std::string& ops_lib = argv[2]; - - LOG(INFO) << "Generating Java operation wrappers for \"" - << ops_lib << "\" library"; - - tensorflow::OpGenerator generator(ops_lib); - - return generator.Run(output_dir + "/src/main/java/"); -} diff --git a/tensorflow/java/src/gen/cc/ops/op_generator.h b/tensorflow/java/src/gen/cc/ops/op_generator.h deleted file mode 100644 index fc52d1ac18..0000000000 --- a/tensorflow/java/src/gen/cc/ops/op_generator.h +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_JAVA_SRC_GEN_CC_OPS_OP_GENERATOR_H_ -#define TENSORFLOW_JAVA_SRC_GEN_CC_OPS_OP_GENERATOR_H_ - -#include - -namespace tensorflow { - -class OpGenerator { - public: - explicit OpGenerator(const std::string& ops_lib); - virtual ~OpGenerator(); - - int Run(const std::string& output_path); - - private: - const std::string ops_lib; - std::string package_name; -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_JAVA_SRC_GEN_CC_OPS_OP_GENERATOR_H_ diff --git a/tensorflow/java/src/gen/gen_ops.bzl b/tensorflow/java/src/gen/gen_ops.bzl new file mode 100644 index 0000000000..e881c35d4d --- /dev/null +++ b/tensorflow/java/src/gen/gen_ops.bzl @@ -0,0 +1,53 @@ +# -*- Python -*- + +load("//tensorflow:tensorflow.bzl", "tf_copts") + +# Given a list of "ops_libs" (a list of files in the core/ops directory +# without their .cc extensions), generate Java wrapper code for all operations +# found in the ops files. +# Then, combine all those source files into a single archive (.srcjar). +# +# For example: +# java_op_gen_srcjar("gen_sources", "gen_tool", [ "array_ops", "math_ops" ]) +# +# will create a genrule named "gen_sources" that first generate source files: +# ops/src/main/java/org/tensorflow/op/array/*.java +# ops/src/main/java/org/tensorflow/op/math/*.java +# +# and then archive those source files in: +# ops/gen_sources.srcjar +# +def java_op_gen_srcjar(name, + gen_tool, + ops_libs=[], + ops_libs_pkg="//tensorflow/core", + out_dir="ops/", + visibility=["//tensorflow/java:__pkg__"]): + + gen_tools = [] + gen_cmds = ["rm -rf $(@D)"] # Always start from fresh when generating source files + + # Construct an op generator binary for each ops library. + for ops_lib in ops_libs: + out_gen_tool = out_dir + ops_lib + "_gen_tool" + + native.cc_binary( + name=out_gen_tool, + copts=tf_copts(), + linkopts=["-lm"], + linkstatic=1, # Faster to link this one-time-use binary dynamically + deps=[gen_tool, ops_libs_pkg + ":" + ops_lib + "_op_lib"]) + + gen_tools += [":" + out_gen_tool] + gen_cmds += ["$(location :" + out_gen_tool + ") --output=$(@D) --file=" + ops_lib] + + # Generate a source archive containing generated code for these ops. + gen_srcjar = out_dir + name + ".srcjar" + gen_cmds += ["$(location @local_jdk//:jar) cMf $(location :" + gen_srcjar + ") -C $(@D) ."] + + native.genrule( + name=name, + srcs=["@local_jdk//:jar"], + outs=[gen_srcjar], + tools=gen_tools, + cmd='&&'.join(gen_cmds)) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index efb2d9a494..f0301937fb 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -430,57 +430,6 @@ def tf_gen_op_wrapper_py(name, ],) -# Given a list of "ops_libs" (a list of files in the core/ops directory -# without their .cc extensions), generate Java wrapper code for all operations -# found in the ops files. -# Then, combine all those source files into a single archive (.srcjar). -# -# For example: -# tf_gen_op_wrappers_java("java_op_gensources", [ "array_ops", "math_ops" ]) -# -# will create a genrule named "java_op_gensources" that first generate source files: -# ops/src/main/java/org/tensorflow/op/array/*.java -# ops/src/main/java/org/tensorflow/op/math/*.java -# -# and then archive those source files in: -# ops/java_op_gensources.srcjar -# -def tf_gen_op_wrappers_java(name, - ops_libs=[], - ops_libs_pkg="//tensorflow/core", - out_dir="ops/", - gen_main=clean_dep("//tensorflow/java:java_ops_gentool"), - visibility=["//tensorflow/java:__pkg__"]): - - gen_tools = [] - gen_cmds = ["rm -rf $(@D)"] # Always start from fresh when generating source files - - # Construct an op generator binary for each ops library. - for ops_lib in ops_libs: - gen_tool = out_dir + ops_lib + "_gentool" - - native.cc_binary( - name=gen_tool, - copts=tf_copts(), - linkopts=["-lm"], - linkstatic=1, # Faster to link this one-time-use binary dynamically - deps=[gen_main, ops_libs_pkg + ":" + ops_lib + "_op_lib"]) - - gen_tools += [":" + gen_tool] - gen_cmds += ["$(location :" + gen_tool + ") $(@D) " + ops_lib] - - # Generate a source archive containing generated code for these ops. - gen_srcjar = out_dir + name + ".srcjar" - gen_cmds += ["$(location @local_jdk//:jar) cMf $(location :" + gen_srcjar + ") -C $(@D) ."] - - native.genrule( - name=name, - srcs=["@local_jdk//:jar"], - outs=[gen_srcjar], - tools=gen_tools, - cmd='&&'.join(gen_cmds)) - - # Define a bazel macro that creates cc_test for tensorflow. # TODO(opensource): we need to enable this to work around the hidden symbol # __cudaRegisterFatBinary error. Need more investigations. -- GitLab From dca9fe248496ead16b38d9b54d080debbe79a992 Mon Sep 17 00:00:00 2001 From: "karl@kubx.ca" Date: Tue, 22 Aug 2017 16:41:48 -0400 Subject: [PATCH 220/294] Integrating comments from the 2nd code review round --- tensorflow/java/BUILD | 7 ++-- tensorflow/java/src/gen/cc/op_gen_main.cc | 47 ++++++++++++++++------ tensorflow/java/src/gen/cc/op_generator.cc | 37 +++++++++++++---- tensorflow/java/src/gen/cc/op_generator.h | 24 ++++------- tensorflow/java/src/gen/gen_ops.bzl | 22 ++++++---- 5 files changed, 89 insertions(+), 48 deletions(-) diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index 11c5a2d302..ee07fc4813 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:private"]) licenses(["notice"]) # Apache 2.0 load(":build_defs.bzl", "JAVACOPTS") -load(":src/gen/gen_ops.bzl", "java_op_gen_srcjar") +load(":src/gen/gen_ops.bzl", "tf_java_op_gen_srcjar") load("//tensorflow:tensorflow.bzl", "tf_copts") java_library( @@ -44,8 +44,9 @@ filegroup( ], ) -java_op_gen_srcjar( +tf_java_op_gen_srcjar( name = "java_op_gen_sources", + gen_base_package = "org.tensorflow.op", gen_tool = "java_op_gen_tool", ops_libs = [ "array_ops", @@ -70,7 +71,7 @@ java_op_gen_srcjar( ) # Build the gen tool as a library, as it will be linked to a core/ops binary -# file before making it an executable. See java_op_gen_srcjar(). +# file before making it an executable. See tf_java_op_gen_srcjar(). cc_library( name = "java_op_gen_tool", srcs = glob([ diff --git a/tensorflow/java/src/gen/cc/op_gen_main.cc b/tensorflow/java/src/gen/cc/op_gen_main.cc index 16cd3e8c71..c396d78f15 100644 --- a/tensorflow/java/src/gen/cc/op_gen_main.cc +++ b/tensorflow/java/src/gen/cc/op_gen_main.cc @@ -11,7 +11,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. - ==============================================================================*/ +==============================================================================*/ #include #include @@ -19,6 +19,7 @@ #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/util/command_line_flags.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/java/src/gen/cc/op_generator.h" @@ -28,33 +29,53 @@ namespace op_gen { const char kUsageHeader[] = "\n\nGenerator of operation wrappers in Java.\n\n" - "This executable generates wrappers for all operations registered in the\n" - "ops file it has been linked to (i.e. one of the /core/ops/*.o binaries).\n" - "Generated files are output to the path provided as an argument, under\n" - "their appropriate package and using a maven-style directory layout.\n\n"; + "This executable generates wrappers for all registered operations it has " + "been compiled with. A wrapper exposes an intuitive and strongly-typed\n" + "interface for building its underlying operation and linking it into a " + "graph.\n\n" + "Operation wrappers are generated under the path specified by the " + "'--output_dir' argument. This path can be absolute or relative to the\n" + "current working directory and will be created if it does not exists.\n\n" + "The '--lib_name' argument is used to classify the set of operations. If " + "the chosen name contains more than one word, it must be provided in \n" + "snake_case. This value is declined into other meaningful names, such as " + "the group and package of the generated operations. For example,\n" + "'--lib_name=my_lib' generates the operations under the " + "'org.tensorflow.op.mylib' package and add them to the 'myLib()' operator\n" + "group.\n\n" + "Note that the operator group assigned to the generated wrappers is just " + "a annotation tag at this stage. Operations will not be available through\n" + "the Ops API as a group until the generated classes are compiled using an " + "appropriate annotation processor.\n\n" + "Finally, the '--base_package' overrides the default parent package " + "under which the generated subpackage and classes are to be located.\n\n"; } // namespace op_gen } // namespace tensorflow int main(int argc, char* argv[]) { - tensorflow::string ops_file; + tensorflow::string lib_name; tensorflow::string output_dir; + tensorflow::string base_package = "org.tensorflow.op"; std::vector flag_list = { - tensorflow::Flag("file", &ops_file, - "name of the ops file linked to this executable"), - tensorflow::Flag("output", &output_dir, - "base directory where to output generated files") + tensorflow::Flag("output_dir", &output_dir, + "Root directory into which output files are generated"), + tensorflow::Flag("lib_name", &lib_name, + "A name, in snake_case, used to classify this set of operations"), + tensorflow::Flag("base_package", &base_package, + "Package parent to the generated subpackage and classes") }; tensorflow::string usage = tensorflow::op_gen::kUsageHeader; usage += tensorflow::Flags::Usage(argv[0], flag_list); bool parsed_flags_ok = tensorflow::Flags::Parse(&argc, argv, flag_list); - QCHECK(parsed_flags_ok && !ops_file.empty() && !output_dir.empty()) << usage; tensorflow::port::InitMain(usage.c_str(), &argc, &argv); + QCHECK(parsed_flags_ok && !lib_name.empty() && !output_dir.empty()) << usage; - tensorflow::OpGenerator generator(tensorflow::Env::Default(), output_dir); + tensorflow::OpGenerator generator; tensorflow::OpList ops; tensorflow::OpRegistry::Global()->Export(true, &ops); - tensorflow::Status status = generator.Run(ops_file, ops); + tensorflow::Status status = + generator.Run(ops, lib_name, base_package, output_dir); TF_QCHECK_OK(status); return 0; diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc index bda1f68abc..f755f982e4 100644 --- a/tensorflow/java/src/gen/cc/op_generator.cc +++ b/tensorflow/java/src/gen/cc/op_generator.cc @@ -20,25 +20,46 @@ limitations under the License. #include "tensorflow/java/src/gen/cc/op_generator.h" namespace tensorflow { +namespace op_gen { -OpGenerator::OpGenerator(Env* env, const string& output_dir) - : env(env), output_path(output_dir + "/src/main/java/") { +string CamelCase(const string& str, char delimiter, bool upper) { + string result; + bool cap = upper; + for (string::const_iterator it = str.begin(); it != str.end(); ++it) { + const char c = *it; + if (c == delimiter) { + cap = true; + } else if (cap) { + result += toupper(c); + cap = false; + } else { + result += c; + } + } + return result; +} + +} // namespace op_gen + +OpGenerator::OpGenerator() + : env(Env::Default()) { } OpGenerator::~OpGenerator() {} -Status OpGenerator::Run(const string& ops_file, const OpList& ops) { - const string& lib_name = ops_file.substr(0, ops_file.find_last_of('_')); - const string package_name = - str_util::StringReplace("org.tensorflow.op." + lib_name, "_", "", true); +Status OpGenerator::Run(const OpList& ops, const string& lib_name, + const string& base_package, const string& output_dir) { + const string package = + base_package + '.' + str_util::StringReplace(lib_name, "_", "", true); const string package_path = - output_path + str_util::StringReplace(package_name, ".", "/", true); + output_dir + '/' + str_util::StringReplace(package, ".", "/", true); + const string group = op_gen::CamelCase(lib_name, '_', false); if (!env->FileExists(package_path).ok()) { TF_CHECK_OK(env->RecursivelyCreateDir(package_path)); } - LOG(INFO) << "Generating Java wrappers for \"" << lib_name << "\" operations"; + LOG(INFO) << "Generating Java wrappers for '" << lib_name << "' operations"; // TODO(karllessard) generate wrappers from list of ops return Status::OK(); diff --git a/tensorflow/java/src/gen/cc/op_generator.h b/tensorflow/java/src/gen/cc/op_generator.h index b0d8cb05af..98a1f8d534 100644 --- a/tensorflow/java/src/gen/cc/op_generator.h +++ b/tensorflow/java/src/gen/cc/op_generator.h @@ -27,31 +27,23 @@ namespace tensorflow { /// \brief A generator of Java operation wrappers. /// /// Such generator is normally ran only once per executable, outputting -/// wrappers for the ops library it has been linked with. Nonetheless, -/// it is designed to support multiple runs, giving a different list of -/// operations on each cycle. +/// wrappers for the all registered operations it has been compiled with. +/// Nonetheless, it is designed to support multiple runs, giving a different +/// list of operations on each cycle. class OpGenerator { public: - /// \brief Create a new generator, giving an environment and an - /// output directory path. - explicit OpGenerator(Env* env, const string& output_dir); + OpGenerator(); virtual ~OpGenerator(); /// \brief Generates wrappers for the given list of 'ops'. /// - /// The list of operations should be issued from the library whose - /// file name starts with 'ops_file' (see /core/ops/*.cc). - /// - /// Generated files are output under this directory: - /// /src/main/java/org/tensorflow/java/op/ - /// where - /// 'output_dir' is the directory passed in the constructor and - /// 'group' is extracted from the 'ops_file' name - Status Run(const string& ops_file, const OpList& ops); + /// Output files are generated in //, + /// where 'lib_package' is derived from 'lib_name'. + Status Run(const OpList& ops, const string& lib_name, + const string& base_package, const string& output_dir); private: Env* env; - const string output_path; }; } // namespace tensorflow diff --git a/tensorflow/java/src/gen/gen_ops.bzl b/tensorflow/java/src/gen/gen_ops.bzl index e881c35d4d..5ef01d68df 100644 --- a/tensorflow/java/src/gen/gen_ops.bzl +++ b/tensorflow/java/src/gen/gen_ops.bzl @@ -8,7 +8,7 @@ load("//tensorflow:tensorflow.bzl", "tf_copts") # Then, combine all those source files into a single archive (.srcjar). # # For example: -# java_op_gen_srcjar("gen_sources", "gen_tool", [ "array_ops", "math_ops" ]) +# tf_java_op_gen_srcjar("gen_sources", "gen_tool", [ "array_ops", "math_ops" ]) # # will create a genrule named "gen_sources" that first generate source files: # ops/src/main/java/org/tensorflow/op/array/*.java @@ -17,18 +17,21 @@ load("//tensorflow:tensorflow.bzl", "tf_copts") # and then archive those source files in: # ops/gen_sources.srcjar # -def java_op_gen_srcjar(name, - gen_tool, - ops_libs=[], - ops_libs_pkg="//tensorflow/core", - out_dir="ops/", - visibility=["//tensorflow/java:__pkg__"]): +def tf_java_op_gen_srcjar(name, + gen_tool, + gen_base_package, + ops_libs=[], + ops_libs_pkg="//tensorflow/core", + out_dir="ops/", + out_src_dir="src/main/java/", + visibility=["//tensorflow/java:__pkg__"]): gen_tools = [] gen_cmds = ["rm -rf $(@D)"] # Always start from fresh when generating source files # Construct an op generator binary for each ops library. for ops_lib in ops_libs: + gen_lib = ops_lib[:ops_lib.rfind('_')] out_gen_tool = out_dir + ops_lib + "_gen_tool" native.cc_binary( @@ -39,7 +42,10 @@ def java_op_gen_srcjar(name, deps=[gen_tool, ops_libs_pkg + ":" + ops_lib + "_op_lib"]) gen_tools += [":" + out_gen_tool] - gen_cmds += ["$(location :" + out_gen_tool + ") --output=$(@D) --file=" + ops_lib] + gen_cmds += ["$(location :" + out_gen_tool + ")" + + " --output_dir=$(@D)/" + out_src_dir + + " --lib_name=" + gen_lib + + " --base_package=" + gen_base_package] # Generate a source archive containing generated code for these ops. gen_srcjar = out_dir + name + ".srcjar" -- GitLab From f15e0e003c6cb59f9c74e1fc2a662fac65c3b993 Mon Sep 17 00:00:00 2001 From: Karl Lessard Date: Fri, 25 Aug 2017 10:46:24 -0400 Subject: [PATCH 221/294] Small update in usage string --- tensorflow/java/src/gen/cc/op_gen_main.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/java/src/gen/cc/op_gen_main.cc b/tensorflow/java/src/gen/cc/op_gen_main.cc index c396d78f15..938b62cdb0 100644 --- a/tensorflow/java/src/gen/cc/op_gen_main.cc +++ b/tensorflow/java/src/gen/cc/op_gen_main.cc @@ -45,8 +45,8 @@ const char kUsageHeader[] = "group.\n\n" "Note that the operator group assigned to the generated wrappers is just " "a annotation tag at this stage. Operations will not be available through\n" - "the Ops API as a group until the generated classes are compiled using an " - "appropriate annotation processor.\n\n" + "the 'org.tensorflow.op.Ops' API as a group until the generated classes " + "are compiled using an appropriate annotation processor.\n\n" "Finally, the '--base_package' overrides the default parent package " "under which the generated subpackage and classes are to be located.\n\n"; -- GitLab From 21082bd7e366f1853c282be1849f0a0e92649517 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Sun, 27 Aug 2017 21:46:30 -0700 Subject: [PATCH 222/294] Use an anonymous namespace for helper functions --- tensorflow/java/src/gen/cc/op_generator.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc index f755f982e4..814a08c6cc 100644 --- a/tensorflow/java/src/gen/cc/op_generator.cc +++ b/tensorflow/java/src/gen/cc/op_generator.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/java/src/gen/cc/op_generator.h" namespace tensorflow { -namespace op_gen { +namespace { string CamelCase(const string& str, char delimiter, bool upper) { string result; @@ -39,7 +39,7 @@ string CamelCase(const string& str, char delimiter, bool upper) { return result; } -} // namespace op_gen +} // namespace OpGenerator::OpGenerator() : env(Env::Default()) { @@ -53,7 +53,7 @@ Status OpGenerator::Run(const OpList& ops, const string& lib_name, base_package + '.' + str_util::StringReplace(lib_name, "_", "", true); const string package_path = output_dir + '/' + str_util::StringReplace(package, ".", "/", true); - const string group = op_gen::CamelCase(lib_name, '_', false); + const string group = CamelCase(lib_name, '_', false); if (!env->FileExists(package_path).ok()) { TF_CHECK_OK(env->RecursivelyCreateDir(package_path)); -- GitLab From 2aa55cf96e5a89032a13e4aa5d25a2ad0cd13bef Mon Sep 17 00:00:00 2001 From: Karl Lessard Date: Fri, 1 Sep 2017 13:53:22 -0400 Subject: [PATCH 223/294] Fix small typo --- tensorflow/java/src/gen/cc/op_gen_main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/java/src/gen/cc/op_gen_main.cc b/tensorflow/java/src/gen/cc/op_gen_main.cc index 938b62cdb0..bc698124bf 100644 --- a/tensorflow/java/src/gen/cc/op_gen_main.cc +++ b/tensorflow/java/src/gen/cc/op_gen_main.cc @@ -44,7 +44,7 @@ const char kUsageHeader[] = "'org.tensorflow.op.mylib' package and add them to the 'myLib()' operator\n" "group.\n\n" "Note that the operator group assigned to the generated wrappers is just " - "a annotation tag at this stage. Operations will not be available through\n" + "an annotation tag at this stage. Operations will not be available through\n" "the 'org.tensorflow.op.Ops' API as a group until the generated classes " "are compiled using an appropriate annotation processor.\n\n" "Finally, the '--base_package' overrides the default parent package " -- GitLab From 1209491913def44650d6457c60a6e41d56de3306 Mon Sep 17 00:00:00 2001 From: Karl Lessard Date: Fri, 1 Sep 2017 14:01:01 -0400 Subject: [PATCH 224/294] Add mandatory "gen_base_package" argument in the usage example --- tensorflow/java/src/gen/gen_ops.bzl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/java/src/gen/gen_ops.bzl b/tensorflow/java/src/gen/gen_ops.bzl index 5ef01d68df..c09503ce8a 100644 --- a/tensorflow/java/src/gen/gen_ops.bzl +++ b/tensorflow/java/src/gen/gen_ops.bzl @@ -8,11 +8,11 @@ load("//tensorflow:tensorflow.bzl", "tf_copts") # Then, combine all those source files into a single archive (.srcjar). # # For example: -# tf_java_op_gen_srcjar("gen_sources", "gen_tool", [ "array_ops", "math_ops" ]) +# tf_java_op_gen_srcjar("gen_sources", "gen_tool", "my.package", [ "array_ops", "math_ops" ]) # # will create a genrule named "gen_sources" that first generate source files: -# ops/src/main/java/org/tensorflow/op/array/*.java -# ops/src/main/java/org/tensorflow/op/math/*.java +# ops/src/main/java/my/package/array/*.java +# ops/src/main/java/my/package/math/*.java # # and then archive those source files in: # ops/gen_sources.srcjar -- GitLab From 0302320e11c7561cafac1cc279fea87de02b0cf9 Mon Sep 17 00:00:00 2001 From: Eric Liu Date: Mon, 4 Sep 2017 02:42:28 -0700 Subject: [PATCH 225/294] [tpu:profiler] Write RunMetadata of the computation graph to event file, if available. The RunMetadata can be used to annotate HLO graphs with colors based on node compute time. PiperOrigin-RevId: 167477021 --- .../tpu/profiler/capture_tpu_profile.cc | 45 ++++++++++++------- .../contrib/tpu/profiler/tpu_profiler.proto | 6 +++ 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc index a0dc15249f..db77b3fa90 100644 --- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc @@ -38,7 +38,9 @@ limitations under the License. #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/util/command_line_flags.h" +#include "tensorflow/core/util/event.pb.h" #include "tensorflow/core/util/events_writer.h" namespace tensorflow { @@ -136,14 +138,39 @@ ProfileResponse Profile(const string& service_addr, int duration_ms) { return response; } -void DumpGraph(StringPiece logdir, StringPiece run, const string& graph_def) { +void DumpGraphEvents(const string& logdir, const string& run, + const ProfileResponse& response) { + int num_graphs = response.computation_graph_size(); + if (response.computation_graph_size() == 0) return; + // The server might generates multiple graphs for one program; we simply + // pick the first one. + if (num_graphs > 1) { + std::cout << num_graphs + << " TPU program variants observed over the profiling period. " + << "One computation graph will be chosen arbitrarily." + << std::endl; + } // The graph plugin expects the graph in //. string run_dir = JoinPath(logdir, strings::StrCat(kGraphRunPrefix, run)); TF_CHECK_OK(Env::Default()->RecursivelyCreateDir(run_dir)); EventsWriter event_writer(JoinPath(run_dir, "events")); Event event; - event.set_graph_def(graph_def); + // Add the computation graph. + event.set_graph_def(response.computation_graph(0).SerializeAsString()); event_writer.WriteEvent(event); + std::cout << "Wrote a HLO graph to " << event_writer.FileName() << std::endl; + + if (response.has_hlo_metadata()) { + tensorflow::TaggedRunMetadata tagged_run_metadata; + tagged_run_metadata.set_tag(run); + tagged_run_metadata.set_run_metadata( + response.hlo_metadata().SerializeAsString()); + tensorflow::Event meta_event; + *meta_event.mutable_tagged_run_metadata() = tagged_run_metadata; + event_writer.WriteEvent(meta_event); + std::cout << "Wrote HLO ops run metadata to " << event_writer.FileName() + << std::endl; + } } } // namespace @@ -186,19 +213,7 @@ int main(int argc, char** argv) { LOG(INFO) << "Converting trace events to TraceViewer JSON."; tensorflow::tpu::DumpTraceToLogDirectory(run_dir, response.encoded_trace()); } - int num_graphs = response.computation_graph_size(); - if (num_graphs > 0) { - // The server might generates multiple graphs for one program; we simply - // pick the first one. - if (num_graphs > 1) { - std::cout << num_graphs - << " TPU program variants observed over the profiling period. " - << "One computation graph will be chosen arbitrarily." - << std::endl; - } - tensorflow::tpu::DumpGraph( - FLAGS_logdir, run, response.computation_graph(0).SerializeAsString()); - } + tensorflow::tpu::DumpGraphEvents(FLAGS_logdir, run, response); if (response.has_op_profile() && (response.op_profile().has_by_program_structure() || response.op_profile().has_by_category())) { diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto index d0a27f1a3d..88e86eca3b 100644 --- a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto +++ b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto @@ -2,6 +2,7 @@ syntax = "proto3"; package tensorflow; import "tensorflow/core/framework/graph.proto"; +import "tensorflow/core/protobuf/config.proto"; import "tensorflow/contrib/tpu/profiler/op_profile.proto"; // The TPUProfiler service retrieves performance information about @@ -31,6 +32,10 @@ message ProfileResponse { // Graphs of programs executed on TPUs during the profiling period. repeated GraphDef computation_graph = 2; + // Performance profile that can be used to annotate HLO operations in the + // computation graph. + RunMetadata hlo_metadata = 5; + // Encoded Trace proto message that contains metadata about the trace captured // during the profiling period. Describes the devices and resources that // 'trace_events' refers to. @@ -40,4 +45,5 @@ message ProfileResponse { // If the trace covers multiple programs, the longest-running one is analyzed. // See op_profile.proto for the detailed semantics of the returned profile. tpu.op_profile.Profile op_profile = 4; + // next-field: 6 } -- GitLab From 07356b48e4b374efd406fd142faa77cfa4db05e9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Sep 2017 03:19:53 -0700 Subject: [PATCH 226/294] Exposing launchpad for conv2d backprop, and unify launchpads for conv2d and depthwise_conv to match example in documentation (see ./extend/adding_an_op.md) PiperOrigin-RevId: 167480081 --- .../core/kernels/conv_grad_filter_ops.cc | 690 +++++++++-------- .../core/kernels/conv_grad_input_ops.cc | 716 +++++++++--------- tensorflow/core/kernels/conv_grad_ops.h | 37 + tensorflow/core/kernels/conv_ops.cc | 42 +- tensorflow/core/kernels/conv_ops.h | 32 +- .../core/kernels/depthwise_conv_grad_op.cc | 74 +- tensorflow/core/kernels/depthwise_conv_op.cc | 42 +- tensorflow/core/kernels/depthwise_conv_op.h | 47 ++ .../core/kernels/depthwise_conv_op_gpu.cu.cc | 103 ++- 9 files changed, 922 insertions(+), 861 deletions(-) diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc index 65514937f4..8eb705b2e5 100644 --- a/tensorflow/core/kernels/conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc @@ -91,6 +91,20 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; +template +struct LaunchConv2DBackpropInputOp { + void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, + const Tensor& out_backprop, const Tensor& input, + int row_stride, int col_stride, const Padding& padding, + Tensor* filter_backprop, TensorFormat data_format) { + const CPUDevice& d = ctx->eigen_device(); + functor::SpatialConvolutionBackwardInput()( + d, filter_backprop->tensor(), input.tensor(), + out_backprop.tensor(), filter_backprop->dim_size(0), + filter_backprop->dim_size(1), row_stride, col_stride); + } +}; + #ifdef TENSORFLOW_USE_LIBXSMM template struct LaunchXsmmBackwardFilter { @@ -237,11 +251,9 @@ class Conv2DFastBackpropFilterOp : public OpKernel { } #endif - functor::SpatialConvolutionBackwardKernel()( - context->eigen_device(), filter_backprop->tensor(), - input.tensor(), out_backprop.tensor(), - dims.spatial_dims[0].filter_size, dims.spatial_dims[1].filter_size, - dims.spatial_dims[0].stride, dims.spatial_dims[1].stride); + LaunchConv2DBackpropInputOp()( + context, false, false, out_backprop, input, dims.spatial_dims[0].stride, + dims.spatial_dims[1].stride, padding_, filter_backprop, data_format_); } private: @@ -495,15 +507,10 @@ class Conv2DSlowBackpropFilterOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_)); use_cudnn_ &= CanUseCudnn(); cudnn_use_autotune_ = CudnnUseAutotune(); - cudnn_disable_conv_1x1_optimization_ = CudnnDisableConv1x1Optimization(); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); } void Compute(OpKernelContext* context) override { - using perftools::gputools::dnn::AlgorithmConfig; - using perftools::gputools::dnn::AlgorithmType; - using perftools::gputools::dnn::ProfileResult; - using perftools::gputools::dnn::kDefaultAlgorithm; const Tensor& input = context->input(0); const Tensor& filter_sizes = context->input(1); const Tensor& out_backprop = context->input(2); @@ -512,352 +519,373 @@ class Conv2DSlowBackpropFilterOp : public OpKernel { errors::InvalidArgument( "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ", filter_sizes.dims())); - const TensorShape& input_shape = input.shape(); TensorShape filter_shape; OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( filter_sizes.vec(), &filter_shape)); - ConvBackpropDimensions dims; - OP_REQUIRES_OK(context, - ConvBackpropComputeDimensions( - "Conv2DSlowBackpropFilter", /*num_spatial_dims=*/2, - input.shape(), filter_shape, out_backprop.shape(), - strides_, padding_, data_format_, &dims)); - Tensor* filter_backprop = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, filter_shape, &filter_backprop)); - const int padding_rows = - (padding_ == VALID) - ? 0 - : std::max(0, (dims.spatial_dims[0].output_size - 1) * - dims.spatial_dims[0].stride + - dims.spatial_dims[0].filter_size - - dims.spatial_dims[0].input_size); - const int padding_cols = - (padding_ == VALID) - ? 0 - : std::max(0, (dims.spatial_dims[1].output_size - 1) * - dims.spatial_dims[1].stride + - dims.spatial_dims[1].filter_size - - dims.spatial_dims[1].input_size); - - // TODO(zhengxq): cuDNN only supports equal padding on both sides, so only - // calling it when that is true. Remove this check when (if?) cuDNN starts - // supporting different padding. - bool rows_odd = (padding_rows % 2 != 0); - bool cols_odd = (padding_cols % 2 != 0); - - auto* stream = context->op_device_context()->stream(); - OP_REQUIRES(context, stream, errors::Internal("No GPU stream available.")); - - if (!use_cudnn_) { - context->SetStatus(errors::Unimplemented( - "Conv2DBackprop for GPU is not currently supported " - "without cudnn")); - return; - } + // For now we take the stride from the second and third dimensions only (we + // do not support striding on the batch or depth dimension). + const int stride_rows = GetTensorDim(strides_, data_format_, 'H'); + const int stride_cols = GetTensorDim(strides_, data_format_, 'W'); - if (!cudnn_disable_conv_1x1_optimization_ && - dims.spatial_dims[0].filter_size == 1 && - dims.spatial_dims[1].filter_size == 1 && - dims.spatial_dims[0].stride == 1 && dims.spatial_dims[1].stride == 1 && - data_format_ == FORMAT_NHWC) { - const uint64 m = dims.in_depth; - const uint64 k = dims.batch_size * dims.spatial_dims[0].input_size * - dims.spatial_dims[1].input_size; - const uint64 n = dims.out_depth; - - // The shape of output backprop is - // [batch, out_rows, out_cols, out_depth] - // From cublas's perspective, it is: n x k - auto a_ptr = AsDeviceMemory(out_backprop.template flat().data(), - out_backprop.template flat().size()); - - // The shape of input is - // [batch, in_rows, in_cols, in_depth], - // From cublas's perspective, it is: m x k - auto b_ptr = AsDeviceMemory(input.template flat().data(), - input.template flat().size()); - - // the shape of the filter backprop from the conv_2d should be - // [1, 1, in_depth, out_depth] - // From cublas's perspective, it is: n x m - auto c_ptr = AsDeviceMemory(filter_backprop->template flat().data(), - filter_backprop->template flat().size()); - - bool blas_launch_status = - stream - ->ThenBlasGemm(perftools::gputools::blas::Transpose::kNoTranspose, - perftools::gputools::blas::Transpose::kTranspose, - n, m, k, 1.0f, a_ptr, n, b_ptr, m, 0.0f, &c_ptr, n) - .ok(); - if (!blas_launch_status) { - context->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, - ", n=", n, ", k=", k)); - } - return; - } else if (dims.spatial_dims[0].filter_size == - dims.spatial_dims[0].input_size && - dims.spatial_dims[1].filter_size == - dims.spatial_dims[1].input_size && - padding_ == VALID && data_format_ == FORMAT_NHWC) { - // The input data and filter have the same height/width, so call cublas - // directly. - const uint64 m = dims.spatial_dims[0].input_size * - dims.spatial_dims[1].input_size * dims.in_depth; - const uint64 k = dims.batch_size; - const uint64 n = dims.out_depth; - - auto a_ptr = AsDeviceMemory(input.template flat().data(), - input.template flat().size()); - auto b_ptr = AsDeviceMemory(out_backprop.template flat().data(), - out_backprop.template flat().size()); - auto c_ptr = AsDeviceMemory(filter_backprop->template flat().data(), - filter_backprop->template flat().size()); - - bool blas_launch_status = - stream - ->ThenBlasGemm(perftools::gputools::blas::Transpose::kNoTranspose, - perftools::gputools::blas::Transpose::kTranspose, - n, m, k, 1.0f, b_ptr, n, a_ptr, m, 0.0f, &c_ptr, n) - .ok(); - if (!blas_launch_status) { - context->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, - ", n=", n, ", k=", k)); - } - return; - } + launcher_(context, use_cudnn_, cudnn_use_autotune_, out_backprop, input, + stride_rows, stride_cols, padding_, filter_backprop, + data_format_); + } - Tensor compatible_input; - if (rows_odd || cols_odd) { - // If a padding dimension is odd, we have one more element on the right - // side or the bottom side. This is unsupported in cudnn. Therefore, - // we pad that extra element and make it compatible. - OP_REQUIRES_OK( - context, - context->allocate_temp( - DataTypeToEnum::value, - ShapeFromFormat(data_format_, dims.batch_size, - dims.spatial_dims[0].input_size + rows_odd, - dims.spatial_dims[1].input_size + cols_odd, - dims.in_depth), - &compatible_input)); - - functor::PadInput()( - context->template eigen_device(), - To32Bit(input.tensor()), {{0, 0}}, {{rows_odd, cols_odd}}, - To32Bit(compatible_input.tensor()), data_format_); - } else { - compatible_input = input; + private: + std::vector strides_; + Padding padding_; + bool use_cudnn_; + TensorFormat data_format_; + LaunchConv2DBackpropFilterOp launcher_; + bool cudnn_use_autotune_; + + TF_DISALLOW_COPY_AND_ASSIGN(Conv2DSlowBackpropFilterOp); +}; + +template +void LaunchConv2DBackpropFilterOp::operator()( + OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, + const Tensor& out_backprop, const Tensor& input, int row_stride, + int col_stride, const Padding& padding, Tensor* filter_backprop, + TensorFormat data_format) { + using perftools::gputools::dnn::AlgorithmConfig; + using perftools::gputools::dnn::AlgorithmType; + using perftools::gputools::dnn::ProfileResult; + + std::vector strides(4, 1); + strides[GetTensorDimIndex(data_format, 'H')] = row_stride; + strides[GetTensorDimIndex(data_format, 'W')] = col_stride; + TensorShape filter_shape = filter_backprop->shape(); + + ConvBackpropDimensions dims; + OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensions( + "Conv2DSlowBackpropFilter", /*num_spatial_dims=*/2, + input.shape(), filter_shape, out_backprop.shape(), + strides, padding, data_format, &dims)); + + const int padding_rows = + (padding == VALID) + ? 0 + : std::max(0, (dims.spatial_dims[0].output_size - 1) * + dims.spatial_dims[0].stride + + dims.spatial_dims[0].filter_size - + dims.spatial_dims[0].input_size); + const int padding_cols = + (padding == VALID) + ? 0 + : std::max(0, (dims.spatial_dims[1].output_size - 1) * + dims.spatial_dims[1].stride + + dims.spatial_dims[1].filter_size - + dims.spatial_dims[1].input_size); + + // TODO(zhengxq): cuDNN only supports equal padding on both sides, so only + // calling it when that is true. Remove this check when (if?) cuDNN starts + // supporting different padding. + bool rows_odd = (padding_rows % 2 != 0); + bool cols_odd = (padding_cols % 2 != 0); + + auto* stream = ctx->op_device_context()->stream(); + OP_REQUIRES(ctx, stream, errors::Internal("No GPU stream available.")); + + if (!use_cudnn) { + ctx->SetStatus(errors::Unimplemented( + "Conv2DBackprop for GPU is not currently supported " + "without cudnn")); + return; + } + + bool cudnn_disable_conv_1x1_optimization_ = CudnnDisableConv1x1Optimization(); + if (!cudnn_disable_conv_1x1_optimization_ && + dims.spatial_dims[0].filter_size == 1 && + dims.spatial_dims[1].filter_size == 1 && + dims.spatial_dims[0].stride == 1 && dims.spatial_dims[1].stride == 1 && + data_format == FORMAT_NHWC) { + const uint64 m = dims.in_depth; + const uint64 k = dims.batch_size * dims.spatial_dims[0].input_size * + dims.spatial_dims[1].input_size; + const uint64 n = dims.out_depth; + + // The shape of output backprop is + // [batch, out_rows, out_cols, out_depth] + // From cublas's perspective, it is: n x k + auto a_ptr = AsDeviceMemory(out_backprop.template flat().data(), + out_backprop.template flat().size()); + + // The shape of input is + // [batch, in_rows, in_cols, in_depth], + // From cublas's perspective, it is: m x k + auto b_ptr = AsDeviceMemory(input.template flat().data(), + input.template flat().size()); + + // the shape of the filter backprop from the conv_2d should be + // [1, 1, in_depth, out_depth] + // From cublas's perspective, it is: n x m + auto c_ptr = AsDeviceMemory(filter_backprop->template flat().data(), + filter_backprop->template flat().size()); + + bool blas_launch_status = + stream + ->ThenBlasGemm(perftools::gputools::blas::Transpose::kNoTranspose, + perftools::gputools::blas::Transpose::kTranspose, n, + m, k, 1.0f, a_ptr, n, b_ptr, m, 0.0f, &c_ptr, n) + .ok(); + if (!blas_launch_status) { + ctx->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, + ", n=", n, ", k=", k)); } + return; + } else if (dims.spatial_dims[0].filter_size == + dims.spatial_dims[0].input_size && + dims.spatial_dims[1].filter_size == + dims.spatial_dims[1].input_size && + padding == VALID && data_format == FORMAT_NHWC) { + // The input data and filter have the same height/width, so call cublas + // directly. + const uint64 m = dims.spatial_dims[0].input_size * + dims.spatial_dims[1].input_size * dims.in_depth; + const uint64 k = dims.batch_size; + const uint64 n = dims.out_depth; + + auto a_ptr = AsDeviceMemory(input.template flat().data(), + input.template flat().size()); + auto b_ptr = AsDeviceMemory(out_backprop.template flat().data(), + out_backprop.template flat().size()); + auto c_ptr = AsDeviceMemory(filter_backprop->template flat().data(), + filter_backprop->template flat().size()); + + bool blas_launch_status = + stream + ->ThenBlasGemm(perftools::gputools::blas::Transpose::kNoTranspose, + perftools::gputools::blas::Transpose::kTranspose, n, + m, k, 1.0f, b_ptr, n, a_ptr, m, 0.0f, &c_ptr, n) + .ok(); + if (!blas_launch_status) { + ctx->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, + ", n=", n, ", k=", k)); + } + return; + } - CHECK(padding_rows >= 0 && padding_cols >= 0) - << "Negative row or col paddings: (" << padding_rows << ", " - << padding_cols << ")"; - perftools::gputools::dnn::BatchDescriptor input_desc; - input_desc.set_count(dims.batch_size) - .set_height(GetTensorDim(compatible_input, data_format_, 'H')) - .set_width(GetTensorDim(compatible_input, data_format_, 'W')) - .set_feature_map_count(dims.in_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor output_desc; - output_desc.set_count(dims.batch_size) - .set_height(dims.spatial_dims[0].output_size) - .set_width(dims.spatial_dims[1].output_size) - .set_feature_map_count(dims.out_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::FilterDescriptor filter_desc; - filter_desc.set_input_filter_height(dims.spatial_dims[0].filter_size) - .set_input_filter_width(dims.spatial_dims[1].filter_size) - .set_input_feature_map_count(dims.in_depth) - .set_output_feature_map_count(dims.out_depth); - perftools::gputools::dnn::ConvolutionDescriptor conv_desc; - conv_desc.set_vertical_filter_stride(dims.spatial_dims[0].stride) - .set_horizontal_filter_stride(dims.spatial_dims[1].stride) - .set_zero_padding_height(padding_rows / 2) - .set_zero_padding_width(padding_cols / 2); - - // NOTE(zhengxq): - // cuDNN only supports the following layouts : - // Input : B x D x R x C - // Filter : OD x ID x R x C - // Whereas, we have - // Input : B x R x C x D - // Filter : R x C x ID x OD - // TransformFilter performs (R x C x ID x OD) => (OD x ID x R x C) - // The first TransformDepth performs - // (B x R x C x D) => (B x D x R x C). - // Since the tensor returned from cuDNN is B x D x R x C also, - // the second TransformDepth performs - // (B x D x R x C) => (B x R x C x D). - - Tensor pre_transformed_filter_backprop; - OP_REQUIRES_OK(context, context->allocate_temp( - DataTypeToEnum::value, - TensorShape({dims.out_depth, dims.in_depth, - dims.spatial_dims[0].filter_size, - dims.spatial_dims[1].filter_size}), - &pre_transformed_filter_backprop)); - - Tensor transformed_out_backprop; - if (data_format_ == FORMAT_NHWC) { - TensorShape nchw_shape = ShapeFromFormat( - FORMAT_NCHW, dims.batch_size, dims.spatial_dims[0].output_size, - dims.spatial_dims[1].output_size, dims.out_depth); - if (dims.out_depth > 1) { - OP_REQUIRES_OK(context, context->allocate_temp( - DataTypeToEnum::value, nchw_shape, - &transformed_out_backprop)); - functor::NHWCToNCHW()( - context->eigen_device(), out_backprop.tensor(), - transformed_out_backprop.tensor()); - } else { - // If depth <= 1, just reshape. - CHECK(transformed_out_backprop.CopyFrom(out_backprop, nchw_shape)); - } + Tensor compatible_input; + if (rows_odd || cols_odd) { + // If a padding dimension is odd, we have one more element on the right + // side or the bottom side. This is unsupported in cudnn. Therefore, + // we pad that extra element and make it compatible. + OP_REQUIRES_OK( + ctx, ctx->allocate_temp( + DataTypeToEnum::value, + ShapeFromFormat(data_format, dims.batch_size, + dims.spatial_dims[0].input_size + rows_odd, + dims.spatial_dims[1].input_size + cols_odd, + dims.in_depth), + &compatible_input)); + + functor::PadInput()( + ctx->template eigen_device(), To32Bit(input.tensor()), + {{0, 0}}, {{rows_odd, cols_odd}}, + To32Bit(compatible_input.tensor()), data_format); + } else { + compatible_input = input; + } + + CHECK(padding_rows >= 0 && padding_cols >= 0) + << "Negative row or col paddings: (" << padding_rows << ", " + << padding_cols << ")"; + perftools::gputools::dnn::BatchDescriptor input_desc; + input_desc.set_count(dims.batch_size) + .set_height(GetTensorDim(compatible_input, data_format, 'H')) + .set_width(GetTensorDim(compatible_input, data_format, 'W')) + .set_feature_map_count(dims.in_depth) + .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + perftools::gputools::dnn::BatchDescriptor output_desc; + output_desc.set_count(dims.batch_size) + .set_height(dims.spatial_dims[0].output_size) + .set_width(dims.spatial_dims[1].output_size) + .set_feature_map_count(dims.out_depth) + .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + perftools::gputools::dnn::FilterDescriptor filter_desc; + filter_desc.set_input_filter_height(dims.spatial_dims[0].filter_size) + .set_input_filter_width(dims.spatial_dims[1].filter_size) + .set_input_feature_map_count(dims.in_depth) + .set_output_feature_map_count(dims.out_depth); + perftools::gputools::dnn::ConvolutionDescriptor conv_desc; + conv_desc.set_vertical_filter_stride(dims.spatial_dims[0].stride) + .set_horizontal_filter_stride(dims.spatial_dims[1].stride) + .set_zero_padding_height(padding_rows / 2) + .set_zero_padding_width(padding_cols / 2); + + // NOTE(zhengxq): + // cuDNN only supports the following layouts : + // Input : B x D x R x C + // Filter : OD x ID x R x C + // Whereas, we have + // Input : B x R x C x D + // Filter : R x C x ID x OD + // TransformFilter performs (R x C x ID x OD) => (OD x ID x R x C) + // The first TransformDepth performs + // (B x R x C x D) => (B x D x R x C). + // Since the tensor returned from cuDNN is B x D x R x C also, + // the second TransformDepth performs + // (B x D x R x C) => (B x R x C x D). + + Tensor pre_transformed_filter_backprop; + OP_REQUIRES_OK( + ctx, ctx->allocate_temp(DataTypeToEnum::value, + TensorShape({dims.out_depth, dims.in_depth, + dims.spatial_dims[0].filter_size, + dims.spatial_dims[1].filter_size}), + &pre_transformed_filter_backprop)); + + Tensor transformed_out_backprop; + if (data_format == FORMAT_NHWC) { + TensorShape nchw_shape = ShapeFromFormat( + FORMAT_NCHW, dims.batch_size, dims.spatial_dims[0].output_size, + dims.spatial_dims[1].output_size, dims.out_depth); + if (dims.out_depth > 1) { + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DataTypeToEnum::value, nchw_shape, + &transformed_out_backprop)); + functor::NHWCToNCHW()( + ctx->eigen_device(), out_backprop.tensor(), + transformed_out_backprop.tensor()); } else { - transformed_out_backprop = out_backprop; + // If depth <= 1, just reshape. + CHECK(transformed_out_backprop.CopyFrom(out_backprop, nchw_shape)); } + } else { + transformed_out_backprop = out_backprop; + } - Tensor transformed_input; - if (data_format_ == FORMAT_NHWC) { - TensorShape nchw_shape = ShapeFromFormat( - FORMAT_NCHW, GetTensorDim(compatible_input, data_format_, 'N'), - GetTensorDim(compatible_input, data_format_, 'H'), - GetTensorDim(compatible_input, data_format_, 'W'), - GetTensorDim(compatible_input, data_format_, 'C')); - if (nchw_shape.dim_size(1) > 1) { - OP_REQUIRES_OK(context, - context->allocate_temp(DataTypeToEnum::value, - nchw_shape, &transformed_input)); - functor::NHWCToNCHW()( - context->eigen_device(), - const_cast(compatible_input).tensor(), - transformed_input.tensor()); - } else { - // If depth <= 1, just reshape. - CHECK(transformed_input.CopyFrom(compatible_input, nchw_shape)); - } + Tensor transformed_input; + if (data_format == FORMAT_NHWC) { + TensorShape nchw_shape = ShapeFromFormat( + FORMAT_NCHW, GetTensorDim(compatible_input, data_format, 'N'), + GetTensorDim(compatible_input, data_format, 'H'), + GetTensorDim(compatible_input, data_format, 'W'), + GetTensorDim(compatible_input, data_format, 'C')); + if (nchw_shape.dim_size(1) > 1) { + OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum::value, + nchw_shape, &transformed_input)); + functor::NHWCToNCHW()( + ctx->eigen_device(), + const_cast(compatible_input).tensor(), + transformed_input.tensor()); } else { - transformed_input = compatible_input; + // If depth <= 1, just reshape. + CHECK(transformed_input.CopyFrom(compatible_input, nchw_shape)); } + } else { + transformed_input = compatible_input; + } - auto out_backprop_ptr = - AsDeviceMemory(transformed_out_backprop.template flat().data(), - transformed_out_backprop.template flat().size()); - auto filter_backprop_ptr = AsDeviceMemory( - pre_transformed_filter_backprop.template flat().data(), - pre_transformed_filter_backprop.template flat().size()); - auto input_ptr = - AsDeviceMemory(transformed_input.template flat().data(), - transformed_input.template flat().size()); - - static int64 ConvolveBackwardFilterScratchSize = GetCudnnWorkspaceLimit( - "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32 // 4GB by default - ); - int device_id = stream->parent()->device_ordinal(); - DataType dtype = input.dtype(); - ConvParameters conv_parameters = { - dims.batch_size, // batch - dims.in_depth, // in_depths - {{input_desc.height(), // in_rows - input_desc.width()}}, // in_cols - dims.out_depth, // out_depths - {{dims.spatial_dims[0].filter_size, // filter_rows - dims.spatial_dims[1].filter_size}}, // filter_cols - {{dims.spatial_dims[0].stride, // stride_rows - dims.spatial_dims[1].stride}}, // stride_cols - {{padding_rows, // padding_rows - padding_cols}}, // padding_cols - dtype, // tensor datatype - device_id, // device_id - }; - AlgorithmConfig algorithm_config; - if (cudnn_use_autotune_ && !AutoTuneConvBwdFilter::GetInstance()->Find( - conv_parameters, &algorithm_config)) { - std::vector algorithms; - CHECK(stream->parent()->GetConvolveBackwardFilterAlgorithms( - conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); - ProfileResult best_result; - ProfileResult best_result_no_scratch; - for (auto profile_algorithm : algorithms) { - // TODO(zhengxq): profile each algorithm multiple times to better - // accuracy. - CudnnScratchAllocator scratch_allocator( - ConvolveBackwardFilterScratchSize, context); - ProfileResult profile_result; - bool cudnn_launch_status = - stream - ->ThenConvolveBackwardFilterWithAlgorithm( - input_desc, input_ptr, output_desc, out_backprop_ptr, - conv_desc, filter_desc, &filter_backprop_ptr, - &scratch_allocator, AlgorithmConfig(profile_algorithm), - &profile_result) - .ok(); - if (cudnn_launch_status) { - if (profile_result.is_valid()) { - if (profile_result.elapsed_time_in_ms() < - best_result.elapsed_time_in_ms()) { - best_result = profile_result; - } - if (scratch_allocator.TotalByteSize() == 0 && - profile_result.elapsed_time_in_ms() < - best_result_no_scratch.elapsed_time_in_ms()) { - best_result_no_scratch = profile_result; - } + auto out_backprop_ptr = + AsDeviceMemory(transformed_out_backprop.template flat().data(), + transformed_out_backprop.template flat().size()); + auto filter_backprop_ptr = + AsDeviceMemory(pre_transformed_filter_backprop.template flat().data(), + pre_transformed_filter_backprop.template flat().size()); + auto input_ptr = AsDeviceMemory(transformed_input.template flat().data(), + transformed_input.template flat().size()); + + static int64 ConvolveBackwardFilterScratchSize = GetCudnnWorkspaceLimit( + "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32 // 4GB by default + ); + int device_id = stream->parent()->device_ordinal(); + DataType dtype = input.dtype(); + ConvParameters conv_parameters = { + dims.batch_size, // batch + dims.in_depth, // in_depths + {{input_desc.height(), // in_rows + input_desc.width()}}, // in_cols + dims.out_depth, // out_depths + {{dims.spatial_dims[0].filter_size, // filter_rows + dims.spatial_dims[1].filter_size}}, // filter_cols + {{dims.spatial_dims[0].stride, // stride_rows + dims.spatial_dims[1].stride}}, // stride_cols + {{padding_rows, // padding_rows + padding_cols}}, // padding_cols + dtype, // tensor datatype + device_id, // device_id + }; + AlgorithmConfig algorithm_config; + if (cudnn_use_autotune && !AutoTuneConvBwdFilter::GetInstance()->Find( + conv_parameters, &algorithm_config)) { + std::vector algorithms; + CHECK(stream->parent()->GetConvolveBackwardFilterAlgorithms( + conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); + ProfileResult best_result; + ProfileResult best_result_no_scratch; + for (auto profile_algorithm : algorithms) { + // TODO(zhengxq): profile each algorithm multiple times to better + // accuracy. + CudnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize, + ctx); + ProfileResult profile_result; + bool cudnn_launch_status = + stream + ->ThenConvolveBackwardFilterWithAlgorithm( + input_desc, input_ptr, output_desc, out_backprop_ptr, + conv_desc, filter_desc, &filter_backprop_ptr, + &scratch_allocator, AlgorithmConfig(profile_algorithm), + &profile_result) + .ok(); + if (cudnn_launch_status) { + if (profile_result.is_valid()) { + if (profile_result.elapsed_time_in_ms() < + best_result.elapsed_time_in_ms()) { + best_result = profile_result; + } + if (scratch_allocator.TotalByteSize() == 0 && + profile_result.elapsed_time_in_ms() < + best_result_no_scratch.elapsed_time_in_ms()) { + best_result_no_scratch = profile_result; } } } - OP_REQUIRES(context, - best_result.is_valid() || best_result_no_scratch.is_valid(), - errors::NotFound("No algorithm worked!")); - if (best_result.is_valid()) { - algorithm_config.set_algorithm(best_result.algorithm()); - } - if (best_result_no_scratch.is_valid()) { - algorithm_config.set_algorithm_no_scratch( - best_result_no_scratch.algorithm()); - } - AutoTuneConvBwdFilter::GetInstance()->Insert(conv_parameters, - algorithm_config); } - CudnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize, - context); - bool cudnn_launch_status = - stream - ->ThenConvolveBackwardFilterWithAlgorithm( - input_desc, input_ptr, output_desc, out_backprop_ptr, conv_desc, - filter_desc, &filter_backprop_ptr, &scratch_allocator, - algorithm_config, nullptr) - .ok(); - - if (!cudnn_launch_status) { - context->SetStatus(errors::Internal( - "cuDNN Backward Filter function launch failure : input shape(", - input_shape.DebugString(), ") filter shape(", - filter_shape.DebugString(), ")")); - return; + OP_REQUIRES(ctx, + best_result.is_valid() || best_result_no_scratch.is_valid(), + errors::NotFound("No algorithm worked!")); + if (best_result.is_valid()) { + algorithm_config.set_algorithm(best_result.algorithm()); } - - auto toConstTensor = [](const Tensor& x) -> const Tensor { return x; }; - functor::ReverseTransformFilter()( - context->eigen_device(), - toConstTensor(pre_transformed_filter_backprop).template tensor(), - filter_backprop->tensor()); + if (best_result_no_scratch.is_valid()) { + algorithm_config.set_algorithm_no_scratch( + best_result_no_scratch.algorithm()); + } + AutoTuneConvBwdFilter::GetInstance()->Insert(conv_parameters, + algorithm_config); + } + CudnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize, + ctx); + bool cudnn_launch_status = + stream + ->ThenConvolveBackwardFilterWithAlgorithm( + input_desc, input_ptr, output_desc, out_backprop_ptr, conv_desc, + filter_desc, &filter_backprop_ptr, &scratch_allocator, + algorithm_config, nullptr) + .ok(); + + if (!cudnn_launch_status) { + ctx->SetStatus(errors::Internal( + "cuDNN Backward Filter function launch failure : input shape(", + input.shape().DebugString(), ") filter shape(", + filter_shape.DebugString(), ")")); + return; } - private: - std::vector strides_; - Padding padding_; - bool use_cudnn_; - TensorFormat data_format_; - bool cudnn_use_autotune_; - bool cudnn_disable_conv_1x1_optimization_; - - TF_DISALLOW_COPY_AND_ASSIGN(Conv2DSlowBackpropFilterOp); -}; + auto toConstTensor = [](const Tensor& x) -> const Tensor { return x; }; + functor::ReverseTransformFilter()( + ctx->eigen_device(), + toConstTensor(pre_transformed_filter_backprop).template tensor(), + filter_backprop->tensor()); +} // Forward declarations of the functor specializations for GPU. namespace functor { diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index a5a9549a2f..ce561aa99c 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -97,29 +97,17 @@ typedef Eigen::GpuDevice GPUDevice; // for CPU for now since nvcc times out when trying to compile them. // TODO(yangke): enable them for GPUs when we have a faster compiler. -template -struct LaunchBackwardInputConvolution { - bool operator()(OpKernelContext* context, const Device&, - typename TTypes::Tensor, - typename TTypes::ConstTensor, - typename TTypes::ConstTensor, int, int, int, int, - TensorFormat) const { - return false; - } -}; - -template <> -struct LaunchBackwardInputConvolution { - bool operator()(OpKernelContext* context, const CPUDevice& d, - typename TTypes::Tensor input_backward, - typename TTypes::ConstTensor kernel, - typename TTypes::ConstTensor output_backward, - int input_rows, int input_cols, int row_stride, - int col_stride, TensorFormat data_format) const { - functor::SpatialConvolutionBackwardInput()( - d, input_backward, kernel, output_backward, input_rows, input_cols, - row_stride, col_stride); - return true; +template +struct LaunchConv2DBackpropInputOp { + void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, + const Tensor& out_backprop, const Tensor& filter, + int row_stride, int col_stride, const Padding& padding, + Tensor* in_backprop, TensorFormat data_format) { + const CPUDevice& d = ctx->eigen_device(); + functor::SpatialConvolutionBackwardInput()( + d, in_backprop->tensor(), filter.tensor(), + out_backprop.tensor(), in_backprop->dim_size(1), + in_backprop->dim_size(2), row_stride, col_stride); } }; @@ -268,11 +256,10 @@ class Conv2DFastBackpropInputOp : public OpKernel { } #endif - LaunchBackwardInputConvolution()( - context, context->eigen_device(), in_backprop->tensor(), - filter.tensor(), out_backprop.tensor(), - dims.spatial_dims[0].input_size, dims.spatial_dims[1].input_size, - dims.spatial_dims[0].stride, dims.spatial_dims[1].stride, data_format_); + LaunchConv2DBackpropInputOp()( + context, false, false, out_backprop, filter, + dims.spatial_dims[0].stride, dims.spatial_dims[1].stride, padding_, + in_backprop, data_format_); } private: @@ -600,10 +587,6 @@ class Conv2DSlowBackpropInputOp : public OpKernel { } void Compute(OpKernelContext* context) override { - using perftools::gputools::dnn::AlgorithmConfig; - using perftools::gputools::dnn::AlgorithmType; - using perftools::gputools::dnn::ProfileResult; - using perftools::gputools::dnn::kDefaultAlgorithm; const Tensor& input_sizes = context->input(0); const Tensor& filter = context->input(1); const Tensor& out_backprop = context->input(2); @@ -615,351 +598,372 @@ class Conv2DSlowBackpropInputOp : public OpKernel { TensorShape input_shape; OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( input_sizes.vec(), &input_shape)); - const TensorShape& filter_shape = filter.shape(); - - ConvBackpropDimensions dims; - OP_REQUIRES_OK( - context, ConvBackpropComputeDimensions( - "Conv2DSlowBackpropInput", /*num_spatial_dims=*/2, - input_shape, filter_shape, out_backprop.shape(), strides_, - padding_, data_format_, &dims)); Tensor* in_backprop = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input_shape, &in_backprop)); - const int padding_rows = - (padding_ == VALID) - ? 0 - : std::max(0, (dims.spatial_dims[0].output_size - 1) * - dims.spatial_dims[0].stride + - dims.spatial_dims[0].filter_size - - dims.spatial_dims[0].input_size); - const int padding_cols = - (padding_ == VALID) - ? 0 - : std::max(0, (dims.spatial_dims[1].output_size - 1) * - dims.spatial_dims[1].stride + - dims.spatial_dims[1].filter_size - - dims.spatial_dims[1].input_size); - - // TODO(keveman): cuDNN only supports equal padding on both sides, so only - // calling it when that is true. Remove this check when (if?) cuDNN starts - // supporting different padding. - bool rows_odd = (padding_rows % 2 != 0); - bool cols_odd = (padding_cols % 2 != 0); - - auto* stream = context->op_device_context()->stream(); - OP_REQUIRES(context, stream, errors::Internal("No GPU stream available.")); - - if (!use_cudnn_) { - context->SetStatus(errors::Unimplemented( - "Conv2DBackpropInput for GPU is not currently supported " - "without cudnn")); - return; - } + // For now we take the stride from the second and third dimensions only (we + // do not support striding on the batch or depth dimension). + const int stride_rows = GetTensorDim(strides_, data_format_, 'H'); + const int stride_cols = GetTensorDim(strides_, data_format_, 'W'); - if (dims.spatial_dims[0].filter_size == 1 && - dims.spatial_dims[1].filter_size == 1 && - dims.spatial_dims[0].stride == 1 && dims.spatial_dims[1].stride == 1 && - data_format_ == FORMAT_NHWC) { - // 1x1 filter, so call cublas directly. - const uint64 m = dims.batch_size * dims.spatial_dims[0].input_size * - dims.spatial_dims[1].input_size; - const uint64 k = dims.out_depth; - const uint64 n = dims.in_depth; - - auto a_ptr = AsDeviceMemory(out_backprop.template flat().data(), - out_backprop.template flat().size()); - auto b_ptr = AsDeviceMemory(filter.template flat().data(), - filter.template flat().size()); - auto c_ptr = AsDeviceMemory(in_backprop->template flat().data(), - in_backprop->template flat().size()); - - auto transpose = perftools::gputools::blas::Transpose::kTranspose; - auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; - - bool blas_launch_status = - stream - ->ThenBlasGemm(transpose, no_transpose, n, m, k, 1.0f, b_ptr, k, - a_ptr, k, 0.0f, &c_ptr, n) - .ok(); - if (!blas_launch_status) { - context->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, - ", n=", n, ", k=", k)); - } - return; - } else if (dims.spatial_dims[0].filter_size == - dims.spatial_dims[0].input_size && - dims.spatial_dims[1].filter_size == - dims.spatial_dims[1].input_size && - padding_ == VALID && data_format_ == FORMAT_NHWC) { - // The input data and filter have the same height/width, so call cublas - // directly. - const uint64 m = dims.batch_size; - const uint64 k = dims.out_depth; - const uint64 n = dims.spatial_dims[0].input_size * - dims.spatial_dims[1].input_size * dims.in_depth; - - auto a_ptr = AsDeviceMemory(out_backprop.template flat().data(), - out_backprop.template flat().size()); - auto b_ptr = AsDeviceMemory(filter.template flat().data(), - filter.template flat().size()); - auto c_ptr = AsDeviceMemory(in_backprop->template flat().data(), - in_backprop->template flat().size()); - - auto transpose = perftools::gputools::blas::Transpose::kTranspose; - auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; - - bool blas_launch_status = - stream - ->ThenBlasGemm(transpose, no_transpose, n, m, k, 1.0f, b_ptr, k, - a_ptr, k, 0.0f, &c_ptr, n) - .ok(); - if (!blas_launch_status) { - context->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, - ", n=", n, ", k=", k)); - } - return; - } + launcher_(context, use_cudnn_, cudnn_use_autotune_, out_backprop, filter, + stride_rows, stride_cols, padding_, in_backprop, data_format_); + } - TensorShape compatible_input_shape; - if (rows_odd || cols_odd) { - // If a padding dimension is odd, we have one more element on the right - // side or the bottom side. This is unsupported in cudnn. Therefore, - // we pad that extra element and make it compatible. - compatible_input_shape = ShapeFromFormat( - data_format_, dims.batch_size, - dims.spatial_dims[0].input_size + rows_odd, - dims.spatial_dims[1].input_size + cols_odd, dims.in_depth); - } else { - compatible_input_shape = input_shape; + private: + std::vector strides_; + Padding padding_; + bool use_cudnn_; + TensorFormat data_format_; + LaunchConv2DBackpropInputOp launcher_; + bool cudnn_use_autotune_; + + TF_DISALLOW_COPY_AND_ASSIGN(Conv2DSlowBackpropInputOp); +}; + +template +void LaunchConv2DBackpropInputOp::operator()( + OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, + const Tensor& out_backprop, const Tensor& filter, int row_stride, + int col_stride, const Padding& padding, Tensor* in_backprop, + TensorFormat data_format) { + using perftools::gputools::dnn::AlgorithmConfig; + using perftools::gputools::dnn::AlgorithmType; + using perftools::gputools::dnn::ProfileResult; + + std::vector strides(4, 1); + strides[GetTensorDimIndex(data_format, 'H')] = row_stride; + strides[GetTensorDimIndex(data_format, 'W')] = col_stride; + TensorShape input_shape = in_backprop->shape(); + + const TensorShape& filter_shape = filter.shape(); + ConvBackpropDimensions dims; + OP_REQUIRES_OK(ctx, ConvBackpropComputeDimensions( + "Conv2DSlowBackpropInput", /*num_spatial_dims=*/2, + input_shape, filter_shape, out_backprop.shape(), + strides, padding, data_format, &dims)); + + const int padding_rows = + (padding == VALID) + ? 0 + : std::max(0, (dims.spatial_dims[0].output_size - 1) * + dims.spatial_dims[0].stride + + dims.spatial_dims[0].filter_size - + dims.spatial_dims[0].input_size); + const int padding_cols = + (padding == VALID) + ? 0 + : std::max(0, (dims.spatial_dims[1].output_size - 1) * + dims.spatial_dims[1].stride + + dims.spatial_dims[1].filter_size - + dims.spatial_dims[1].input_size); + + // TODO(keveman): cuDNN only supports equal padding on both sides, so only + // calling it when that is true. Remove this check when (if?) cuDNN starts + // supporting different padding. + bool rows_odd = (padding_rows % 2 != 0); + bool cols_odd = (padding_cols % 2 != 0); + + auto* stream = ctx->op_device_context()->stream(); + OP_REQUIRES(ctx, stream, errors::Internal("No GPU stream available.")); + + if (!use_cudnn) { + ctx->SetStatus(errors::Unimplemented( + "Conv2DBackpropInput for GPU is not currently supported " + "without cudnn")); + return; + } + + if (dims.spatial_dims[0].filter_size == 1 && + dims.spatial_dims[1].filter_size == 1 && + dims.spatial_dims[0].stride == 1 && dims.spatial_dims[1].stride == 1 && + data_format == FORMAT_NHWC) { + // 1x1 filter, so call cublas directly. + const uint64 m = dims.batch_size * dims.spatial_dims[0].input_size * + dims.spatial_dims[1].input_size; + const uint64 k = dims.out_depth; + const uint64 n = dims.in_depth; + + auto a_ptr = AsDeviceMemory(out_backprop.template flat().data(), + out_backprop.template flat().size()); + auto b_ptr = AsDeviceMemory(filter.template flat().data(), + filter.template flat().size()); + auto c_ptr = AsDeviceMemory(in_backprop->template flat().data(), + in_backprop->template flat().size()); + + auto transpose = perftools::gputools::blas::Transpose::kTranspose; + auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; + + bool blas_launch_status = + stream + ->ThenBlasGemm(transpose, no_transpose, n, m, k, 1.0f, b_ptr, k, + a_ptr, k, 0.0f, &c_ptr, n) + .ok(); + if (!blas_launch_status) { + ctx->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, + ", n=", n, ", k=", k)); } + return; + } else if (dims.spatial_dims[0].filter_size == + dims.spatial_dims[0].input_size && + dims.spatial_dims[1].filter_size == + dims.spatial_dims[1].input_size && + padding == VALID && data_format == FORMAT_NHWC) { + // The input data and filter have the same height/width, so call cublas + // directly. + const uint64 m = dims.batch_size; + const uint64 k = dims.out_depth; + const uint64 n = dims.spatial_dims[0].input_size * + dims.spatial_dims[1].input_size * dims.in_depth; + + auto a_ptr = AsDeviceMemory(out_backprop.template flat().data(), + out_backprop.template flat().size()); + auto b_ptr = AsDeviceMemory(filter.template flat().data(), + filter.template flat().size()); + auto c_ptr = AsDeviceMemory(in_backprop->template flat().data(), + in_backprop->template flat().size()); + + auto transpose = perftools::gputools::blas::Transpose::kTranspose; + auto no_transpose = perftools::gputools::blas::Transpose::kNoTranspose; + + bool blas_launch_status = + stream + ->ThenBlasGemm(transpose, no_transpose, n, m, k, 1.0f, b_ptr, k, + a_ptr, k, 0.0f, &c_ptr, n) + .ok(); + if (!blas_launch_status) { + ctx->SetStatus(errors::Internal("Blas SGEMM launch failed : m=", m, + ", n=", n, ", k=", k)); + } + return; + } - CHECK(padding_rows >= 0 && padding_cols >= 0) - << "Negative row or col paddings: (" << padding_rows << ", " - << padding_cols << ")"; - perftools::gputools::dnn::BatchDescriptor input_desc; - input_desc.set_count(dims.batch_size) - .set_height(GetTensorDim(compatible_input_shape, data_format_, 'H')) - .set_width(GetTensorDim(compatible_input_shape, data_format_, 'W')) - .set_feature_map_count(dims.in_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::BatchDescriptor output_desc; - output_desc.set_count(dims.batch_size) - .set_height(dims.spatial_dims[0].output_size) - .set_width(dims.spatial_dims[1].output_size) - .set_feature_map_count(dims.out_depth) - .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); - perftools::gputools::dnn::FilterDescriptor filter_desc; - filter_desc.set_input_filter_height(dims.spatial_dims[0].filter_size) - .set_input_filter_width(dims.spatial_dims[1].filter_size) - .set_input_feature_map_count(dims.in_depth) - .set_output_feature_map_count(dims.out_depth); - perftools::gputools::dnn::ConvolutionDescriptor conv_desc; - conv_desc.set_vertical_filter_stride(dims.spatial_dims[0].stride) - .set_horizontal_filter_stride(dims.spatial_dims[1].stride) - .set_zero_padding_height(padding_rows / 2) - .set_zero_padding_width(padding_cols / 2); - - // NOTE(keveman): - // cuDNN only supports the following layouts : - // Input : B x D x R x C - // Filter : OD x ID x R x C - // Whereas, we have - // Input : B x R x C x D - // Filter : R x C x ID x OD - // TransformFilter performs (R x C x ID x OD) => (OD x ID x R x C) - // The first TransformDepth performs - // (B x R x C x D) => (B x D x R x C). - // Since the tensor returned from cuDNN is B x D x R x C also, - // the second TransformDepth performs - // (B x D x R x C) => (B x R x C x D). - Tensor transformed_filter; - OP_REQUIRES_OK(context, context->allocate_temp( - DataTypeToEnum::value, - TensorShape({dims.out_depth, dims.in_depth, - dims.spatial_dims[0].filter_size, - dims.spatial_dims[1].filter_size}), - &transformed_filter)); - - functor::TransformFilter()( - context->eigen_device(), To32Bit(filter.tensor()), - To32Bit(transformed_filter.tensor())); - - Tensor transformed_out_backprop; - if (data_format_ == FORMAT_NHWC) { - TensorShape nchw_shape = ShapeFromFormat( - FORMAT_NCHW, dims.batch_size, dims.spatial_dims[0].output_size, - dims.spatial_dims[1].output_size, dims.out_depth); - if (dims.out_depth > 1) { - OP_REQUIRES_OK(context, context->allocate_temp( - DataTypeToEnum::value, nchw_shape, - &transformed_out_backprop)); - functor::NHWCToNCHW()( - context->eigen_device(), out_backprop.tensor(), - transformed_out_backprop.tensor()); - } else { - // If depth <= 1, then just reshape. - CHECK(transformed_out_backprop.CopyFrom(out_backprop, nchw_shape)); - } + TensorShape compatible_input_shape; + if (rows_odd || cols_odd) { + // If a padding dimension is odd, we have one more element on the right + // side or the bottom side. This is unsupported in cudnn. Therefore, + // we pad that extra element and make it compatible. + compatible_input_shape = ShapeFromFormat( + data_format, dims.batch_size, + dims.spatial_dims[0].input_size + rows_odd, + dims.spatial_dims[1].input_size + cols_odd, dims.in_depth); + } else { + compatible_input_shape = input_shape; + } + + CHECK(padding_rows >= 0 && padding_cols >= 0) + << "Negative row or col paddings: (" << padding_rows << ", " + << padding_cols << ")"; + perftools::gputools::dnn::BatchDescriptor input_desc; + input_desc.set_count(dims.batch_size) + .set_height(GetTensorDim(compatible_input_shape, data_format, 'H')) + .set_width(GetTensorDim(compatible_input_shape, data_format, 'W')) + .set_feature_map_count(dims.in_depth) + .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + perftools::gputools::dnn::BatchDescriptor output_desc; + output_desc.set_count(dims.batch_size) + .set_height(dims.spatial_dims[0].output_size) + .set_width(dims.spatial_dims[1].output_size) + .set_feature_map_count(dims.out_depth) + .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX); + perftools::gputools::dnn::FilterDescriptor filter_desc; + filter_desc.set_input_filter_height(dims.spatial_dims[0].filter_size) + .set_input_filter_width(dims.spatial_dims[1].filter_size) + .set_input_feature_map_count(dims.in_depth) + .set_output_feature_map_count(dims.out_depth); + perftools::gputools::dnn::ConvolutionDescriptor conv_desc; + conv_desc.set_vertical_filter_stride(dims.spatial_dims[0].stride) + .set_horizontal_filter_stride(dims.spatial_dims[1].stride) + .set_zero_padding_height(padding_rows / 2) + .set_zero_padding_width(padding_cols / 2); + + // NOTE(keveman): + // cuDNN only supports the following layouts : + // Input : B x D x R x C + // Filter : OD x ID x R x C + // Whereas, we have + // Input : B x R x C x D + // Filter : R x C x ID x OD + // TransformFilter performs (R x C x ID x OD) => (OD x ID x R x C) + // The first TransformDepth performs + // (B x R x C x D) => (B x D x R x C). + // Since the tensor returned from cuDNN is B x D x R x C also, + // the second TransformDepth performs + // (B x D x R x C) => (B x R x C x D). + Tensor transformed_filter; + OP_REQUIRES_OK( + ctx, ctx->allocate_temp(DataTypeToEnum::value, + TensorShape({dims.out_depth, dims.in_depth, + dims.spatial_dims[0].filter_size, + dims.spatial_dims[1].filter_size}), + &transformed_filter)); + + functor::TransformFilter()( + ctx->eigen_device(), To32Bit(filter.tensor()), + To32Bit(transformed_filter.tensor())); + + Tensor transformed_out_backprop; + if (data_format == FORMAT_NHWC) { + TensorShape nchw_shape = ShapeFromFormat( + FORMAT_NCHW, dims.batch_size, dims.spatial_dims[0].output_size, + dims.spatial_dims[1].output_size, dims.out_depth); + if (dims.out_depth > 1) { + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DataTypeToEnum::value, nchw_shape, + &transformed_out_backprop)); + functor::NHWCToNCHW()( + ctx->eigen_device(), out_backprop.tensor(), + transformed_out_backprop.tensor()); } else { - transformed_out_backprop = out_backprop; + // If depth <= 1, then just reshape. + CHECK(transformed_out_backprop.CopyFrom(out_backprop, nchw_shape)); } + } else { + transformed_out_backprop = out_backprop; + } - Tensor pre_transformed_in_backprop; - OP_REQUIRES_OK( - context, - context->allocate_temp( - DataTypeToEnum::value, - ShapeFromFormat( - FORMAT_NCHW, - GetTensorDim(compatible_input_shape, data_format_, 'N'), - GetTensorDim(compatible_input_shape, data_format_, 'H'), - GetTensorDim(compatible_input_shape, data_format_, 'W'), - GetTensorDim(compatible_input_shape, data_format_, 'C')), - &pre_transformed_in_backprop)); - - auto out_backprop_ptr = - AsDeviceMemory(transformed_out_backprop.template flat().data(), - transformed_out_backprop.template flat().size()); - auto filter_ptr = - AsDeviceMemory(transformed_filter.template flat().data(), - transformed_filter.template flat().size()); - auto in_backprop_ptr = - AsDeviceMemory(pre_transformed_in_backprop.template flat().data(), - pre_transformed_in_backprop.template flat().size()); - - static int64 ConvolveBackwardDataScratchSize = GetCudnnWorkspaceLimit( - "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32 // 4GB by default - ); - CudnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize, - context); - int device_id = stream->parent()->device_ordinal(); - DataType dtype = out_backprop.dtype(); - ConvParameters conv_parameters = { - dims.batch_size, // batch - dims.in_depth, // in_depths - {{input_desc.height(), // in_rows - input_desc.width()}}, // in_cols - dims.out_depth, // out_depths - {{dims.spatial_dims[0].filter_size, // filter_rows - dims.spatial_dims[1].filter_size}}, // filter_cols - {{dims.spatial_dims[0].stride, // stride_rows - dims.spatial_dims[1].stride}}, // stride_cols - {{padding_rows, // padding_rows - padding_cols}}, // padding_cols - dtype, // tensor data type - device_id, // device_id - }; - AlgorithmConfig algorithm_config; - if (cudnn_use_autotune_ && !AutoTuneConvBwdData::GetInstance()->Find( - conv_parameters, &algorithm_config)) { - std::vector algorithms; - CHECK(stream->parent()->GetConvolveBackwardDataAlgorithms( - conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); - ProfileResult best_result; - ProfileResult best_result_no_scratch; - for (auto profile_algorithm : algorithms) { - // TODO(zhengxq): profile each algorithm multiple times to better - // accuracy. - CudnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize, - context); - ProfileResult profile_result; - bool cudnn_launch_status = - stream - ->ThenConvolveBackwardDataWithAlgorithm( - filter_desc, filter_ptr, output_desc, out_backprop_ptr, - conv_desc, input_desc, &in_backprop_ptr, &scratch_allocator, - AlgorithmConfig(profile_algorithm), &profile_result) - .ok(); - if (cudnn_launch_status) { - if (profile_result.is_valid()) { - if (profile_result.elapsed_time_in_ms() < - best_result.elapsed_time_in_ms()) { - best_result = profile_result; - } - if (scratch_allocator.TotalByteSize() == 0 && - profile_result.elapsed_time_in_ms() < - best_result_no_scratch.elapsed_time_in_ms()) { - best_result_no_scratch = profile_result; - } + Tensor pre_transformed_in_backprop; + OP_REQUIRES_OK( + ctx, ctx->allocate_temp( + DataTypeToEnum::value, + ShapeFromFormat( + FORMAT_NCHW, + GetTensorDim(compatible_input_shape, data_format, 'N'), + GetTensorDim(compatible_input_shape, data_format, 'H'), + GetTensorDim(compatible_input_shape, data_format, 'W'), + GetTensorDim(compatible_input_shape, data_format, 'C')), + &pre_transformed_in_backprop)); + + auto out_backprop_ptr = + AsDeviceMemory(transformed_out_backprop.template flat().data(), + transformed_out_backprop.template flat().size()); + auto filter_ptr = + AsDeviceMemory(transformed_filter.template flat().data(), + transformed_filter.template flat().size()); + auto in_backprop_ptr = + AsDeviceMemory(pre_transformed_in_backprop.template flat().data(), + pre_transformed_in_backprop.template flat().size()); + + static int64 ConvolveBackwardDataScratchSize = GetCudnnWorkspaceLimit( + "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32 // 4GB by default + ); + CudnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize, ctx); + int device_id = stream->parent()->device_ordinal(); + DataType dtype = out_backprop.dtype(); + ConvParameters conv_parameters = { + dims.batch_size, // batch + dims.in_depth, // in_depths + {{input_desc.height(), // in_rows + input_desc.width()}}, // in_cols + dims.out_depth, // out_depths + {{dims.spatial_dims[0].filter_size, // filter_rows + dims.spatial_dims[1].filter_size}}, // filter_cols + {{dims.spatial_dims[0].stride, // stride_rows + dims.spatial_dims[1].stride}}, // stride_cols + {{padding_rows, // padding_rows + padding_cols}}, // padding_cols + dtype, // tensor data type + device_id, // device_id + }; + AlgorithmConfig algorithm_config; + if (cudnn_use_autotune && !AutoTuneConvBwdData::GetInstance()->Find( + conv_parameters, &algorithm_config)) { + std::vector algorithms; + CHECK(stream->parent()->GetConvolveBackwardDataAlgorithms( + conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); + ProfileResult best_result; + ProfileResult best_result_no_scratch; + for (auto profile_algorithm : algorithms) { + // TODO(zhengxq): profile each algorithm multiple times to better + // accuracy. + CudnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize, + ctx); + ProfileResult profile_result; + bool cudnn_launch_status = + stream + ->ThenConvolveBackwardDataWithAlgorithm( + filter_desc, filter_ptr, output_desc, out_backprop_ptr, + conv_desc, input_desc, &in_backprop_ptr, &scratch_allocator, + AlgorithmConfig(profile_algorithm), &profile_result) + .ok(); + if (cudnn_launch_status) { + if (profile_result.is_valid()) { + if (profile_result.elapsed_time_in_ms() < + best_result.elapsed_time_in_ms()) { + best_result = profile_result; + } + if (scratch_allocator.TotalByteSize() == 0 && + profile_result.elapsed_time_in_ms() < + best_result_no_scratch.elapsed_time_in_ms()) { + best_result_no_scratch = profile_result; } } } - OP_REQUIRES(context, - best_result.is_valid() || best_result_no_scratch.is_valid(), - errors::NotFound("No algorithm worked!")); - if (best_result.is_valid()) { - algorithm_config.set_algorithm(best_result.algorithm()); - } - if (best_result_no_scratch.is_valid()) { - algorithm_config.set_algorithm_no_scratch( - best_result_no_scratch.algorithm()); - } - AutoTuneConvBwdData::GetInstance()->Insert(conv_parameters, - algorithm_config); - } - bool cudnn_launch_status = - stream - ->ThenConvolveBackwardDataWithAlgorithm( - filter_desc, filter_ptr, output_desc, out_backprop_ptr, - conv_desc, input_desc, &in_backprop_ptr, &scratch_allocator, - algorithm_config, nullptr) - .ok(); - - if (!cudnn_launch_status) { - context->SetStatus(errors::Internal( - "cuDNN Backward Data function launch failure : input shape(", - input_shape.DebugString(), ") filter shape(", - filter_shape.DebugString(), ")")); - return; } - - if (rows_odd || cols_odd) { - Tensor in_backprop_remove_padding; - OP_REQUIRES_OK( - context, - context->allocate_temp( - DataTypeToEnum::value, - ShapeFromFormat(FORMAT_NCHW, - GetTensorDim(input_shape, data_format_, 'N'), - GetTensorDim(input_shape, data_format_, 'H'), - GetTensorDim(input_shape, data_format_, 'W'), - GetTensorDim(input_shape, data_format_, 'C')), - &in_backprop_remove_padding)); - - // Remove the padding for odd rows or cols. - functor::PadInput()( - context->template eigen_device(), - To32Bit(const_cast(pre_transformed_in_backprop) - .tensor()), - {{0, 0}}, {{-rows_odd, -cols_odd}}, - To32Bit(in_backprop_remove_padding.tensor()), FORMAT_NCHW); - - pre_transformed_in_backprop = in_backprop_remove_padding; + OP_REQUIRES(ctx, + best_result.is_valid() || best_result_no_scratch.is_valid(), + errors::NotFound("No algorithm worked!")); + if (best_result.is_valid()) { + algorithm_config.set_algorithm(best_result.algorithm()); } - - if (data_format_ == FORMAT_NHWC) { - auto toConstTensor = [](const Tensor& x) -> const Tensor { return x; }; - functor::NCHWToNHWC()( - context->eigen_device(), - toConstTensor(pre_transformed_in_backprop).template tensor(), - in_backprop->tensor()); - } else { - *in_backprop = pre_transformed_in_backprop; + if (best_result_no_scratch.is_valid()) { + algorithm_config.set_algorithm_no_scratch( + best_result_no_scratch.algorithm()); } + AutoTuneConvBwdData::GetInstance()->Insert(conv_parameters, + algorithm_config); + } + bool cudnn_launch_status = + stream + ->ThenConvolveBackwardDataWithAlgorithm( + filter_desc, filter_ptr, output_desc, out_backprop_ptr, conv_desc, + input_desc, &in_backprop_ptr, &scratch_allocator, + algorithm_config, nullptr) + .ok(); + + if (!cudnn_launch_status) { + ctx->SetStatus(errors::Internal( + "cuDNN Backward Data function launch failure : input shape(", + input_shape.DebugString(), ") filter shape(", + filter_shape.DebugString(), ")")); + return; } - private: - std::vector strides_; - Padding padding_; - bool use_cudnn_; - TensorFormat data_format_; - bool cudnn_use_autotune_; + if (rows_odd || cols_odd) { + Tensor in_backprop_remove_padding; + OP_REQUIRES_OK( + ctx, ctx->allocate_temp( + DataTypeToEnum::value, + ShapeFromFormat(FORMAT_NCHW, + GetTensorDim(input_shape, data_format, 'N'), + GetTensorDim(input_shape, data_format, 'H'), + GetTensorDim(input_shape, data_format, 'W'), + GetTensorDim(input_shape, data_format, 'C')), + &in_backprop_remove_padding)); + + // Remove the padding for odd rows or cols. + functor::PadInput()( + ctx->template eigen_device(), + To32Bit(const_cast(pre_transformed_in_backprop) + .tensor()), + {{0, 0}}, {{-rows_odd, -cols_odd}}, + To32Bit(in_backprop_remove_padding.tensor()), FORMAT_NCHW); + + pre_transformed_in_backprop = in_backprop_remove_padding; + } - TF_DISALLOW_COPY_AND_ASSIGN(Conv2DSlowBackpropInputOp); -}; + if (data_format == FORMAT_NHWC) { + auto toConstTensor = [](const Tensor& x) -> const Tensor { return x; }; + functor::NCHWToNHWC()( + ctx->eigen_device(), + toConstTensor(pre_transformed_in_backprop).template tensor(), + in_backprop->tensor()); + } else { + *in_backprop = pre_transformed_in_backprop; + } +} // Forward declarations of the functor specializations for GPU. namespace functor { diff --git a/tensorflow/core/kernels/conv_grad_ops.h b/tensorflow/core/kernels/conv_grad_ops.h index 3ea9510afb..2926bb3a86 100644 --- a/tensorflow/core/kernels/conv_grad_ops.h +++ b/tensorflow/core/kernels/conv_grad_ops.h @@ -168,6 +168,43 @@ limitations under the License. namespace tensorflow { +// Forward declaration. +class OpKernelContext; + +template +struct LaunchConv2DBackpropInputOp { + void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, + const Tensor& out_backprop, const Tensor& filter, + int row_stride, int col_stride, const Padding& padding, + Tensor* in_backprop, TensorFormat data_format); +}; + +template +struct LaunchConv2DBackpropFilterOp { + void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, + const Tensor& out_backprop, const Tensor& input, + int row_stride, int col_stride, const Padding& padding, + Tensor* filter_backprop, TensorFormat data_format); +}; + +#ifdef GOOGLE_CUDA +template +struct LaunchConv2DBackpropInputOp { + void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, + const Tensor& input, const Tensor& filter, int row_stride, + int col_stride, const Padding& padding, Tensor* output, + TensorFormat data_format); +}; + +template +struct LaunchConv2DBackpropFilterOp { + void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, + const Tensor& out_backprop, const Tensor& input, + int row_stride, int col_stride, const Padding& padding, + Tensor* filter_backprop, TensorFormat data_format); +}; +#endif // GOOGLE_CUDA + // Information about a single spatial dimension for a convolution // backpropagation. struct ConvBackpropSpatialDimension { diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index 2c77a38952..9de8642d41 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -58,10 +58,10 @@ typedef Eigen::GpuDevice GPUDevice; namespace { template struct LaunchGeneric { - static void launch(OpKernelContext* ctx, const Tensor& input, - const Tensor& filter, int row_stride, int col_stride, - const Eigen::PaddingType& padding, Tensor* output, - TensorFormat data_format) { + void operator()(OpKernelContext* ctx, const Tensor& input, + const Tensor& filter, int row_stride, int col_stride, + const Padding& padding, Tensor* output, + TensorFormat data_format) { CHECK(data_format == FORMAT_NHWC) << "Generic conv implementation only " "supports NHWC tensor format for now."; if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 && row_stride == 1 && @@ -86,8 +86,7 @@ struct LaunchGeneric { filter.shaped({filter.dim_size(2), filter.dim_size(3)}), dim_pair); } else if (filter.dim_size(0) == input.dim_size(1) && - filter.dim_size(1) == input.dim_size(2) && - padding == Eigen::PADDING_VALID) { + filter.dim_size(1) == input.dim_size(2) && padding == VALID) { // If the input data and filter have the same height/width, // the 2D convolution is reduced to matrix multiplication. const int k = // Length of reduction dimension. @@ -104,28 +103,26 @@ struct LaunchGeneric { functor::SpatialConvolution()( ctx->eigen_device(), output->tensor(), input.tensor(), filter.tensor(), row_stride, col_stride, - padding); + BrainPadding2EigenPadding(padding)); } } }; } // namespace template -class LaunchConv2DOp { - public: - void launch(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, - const Tensor& input, const Tensor& filter, int row_stride, - int col_stride, const Eigen::PaddingType& padding, Tensor* output, - TensorFormat data_format) { +struct LaunchConv2DOp { + void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, + const Tensor& input, const Tensor& filter, int row_stride, + int col_stride, const Padding& padding, Tensor* output, + TensorFormat data_format) { if (data_format != FORMAT_NHWC) { ctx->SetStatus( errors::Unimplemented("Generic conv implementation only supports " "NHWC tensor format for now.")); return; } - LaunchGeneric::launch(ctx, input, filter, row_stride, - col_stride, padding, output, - data_format); + LaunchGeneric()(ctx, input, filter, row_stride, col_stride, + padding, output, data_format); } }; @@ -387,9 +384,8 @@ class Conv2DOp : public BinaryOp { return; } - launcher_.launch(context, use_cudnn_, cudnn_use_autotune_, input, filter, - stride_rows, stride_cols, - BrainPadding2EigenPadding(padding_), output, data_format_); + launcher_(context, use_cudnn_, cudnn_use_autotune_, input, filter, + stride_rows, stride_cols, padding_, output, data_format_); } private: @@ -445,10 +441,10 @@ typedef AutoTuneSingleton -void LaunchConv2DOp::launch( +void LaunchConv2DOp::operator()( OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, const Tensor& input_param, const Tensor& filter, int row_stride, - int col_stride, const Eigen::PaddingType& padding, Tensor* output, + int col_stride, const Padding& padding, Tensor* output, TensorFormat data_format) { using perftools::gputools::dnn::AlgorithmConfig; using perftools::gputools::dnn::AlgorithmType; @@ -492,8 +488,8 @@ void LaunchConv2DOp::launch( } return; } else if (filter.dim_size(0) == input.dim_size(1) && - filter.dim_size(1) == input.dim_size(2) && - padding == Eigen::PADDING_VALID && data_format == FORMAT_NHWC) { + filter.dim_size(1) == input.dim_size(2) && padding == VALID && + data_format == FORMAT_NHWC) { // The input data and filter have the same height/width, so call cublas // directly. const uint64 m = input.dim_size(0); diff --git a/tensorflow/core/kernels/conv_ops.h b/tensorflow/core/kernels/conv_ops.h index 60091fc27f..e29271dff2 100644 --- a/tensorflow/core/kernels/conv_ops.h +++ b/tensorflow/core/kernels/conv_ops.h @@ -32,14 +32,23 @@ namespace tensorflow { class OpKernelContext; template -class LaunchConv2DOp { - public: - void launch(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, - const Tensor& input, const Tensor& filter, int row_stride, - int col_stride, const Eigen::PaddingType& padding, Tensor* output, - TensorFormat data_format); +struct LaunchConv2DOp { + void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, + const Tensor& input, const Tensor& filter, int row_stride, + int col_stride, const Padding& padding, Tensor* output, + TensorFormat data_format); }; +#ifdef GOOGLE_CUDA +template +struct LaunchConv2DOp { + void operator()(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, + const Tensor& input, const Tensor& filter, int row_stride, + int col_stride, const Padding& padding, Tensor* output, + TensorFormat data_format); +}; +#endif // GOOGLE_CUDA + // Used to keep track of persistent memory buffers used within the op. // It uses malloc and free to avoid the time cost of initializing the memory. template @@ -55,17 +64,6 @@ struct Im2ColBufferResource : public ResourceBase { string DebugString() { return "Im2ColBufferResource"; } }; -#ifdef GOOGLE_CUDA -template -class LaunchConv2DOp { - public: - void launch(OpKernelContext* ctx, bool use_cudnn, bool cudnn_use_autotune, - const Tensor& input, const Tensor& filter, int row_stride, - int col_stride, const Eigen::PaddingType& padding, Tensor* output, - TensorFormat data_format); -}; -#endif // GOOGLE_CUDA - } // namespace tensorflow #endif // TENSORFLOW_KERNELS_CONV_OPS_H diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc index 00d7f56408..9804d7d38e 100644 --- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc @@ -361,19 +361,15 @@ static void ComputeBackpropInput(const DepthwiseArgs& args, } } -// Kernels to compute the input backprop for depthwise convolution. -template -struct LaunchDepthwiseConvBackpropInputOp; - // Computes the depthwise conv2d backprop input of 'out_backprop' by // 'depthwise_filter' and stores the result in 'in_backprop'. template struct LaunchDepthwiseConvBackpropInputOp { typedef typename Eigen::internal::packet_traits::type Packet; - static void launch(OpKernelContext* ctx, const DepthwiseArgs& args, - const T* out_backprop, const T* depthwise_filter, - T* in_backprop, TensorFormat data_format) { + void operator()(OpKernelContext* ctx, const DepthwiseArgs& args, + const T* out_backprop, const T* depthwise_filter, + T* in_backprop, TensorFormat data_format) { OP_REQUIRES( ctx, data_format == FORMAT_NHWC, errors::Unimplemented( @@ -514,27 +510,8 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args, #if GOOGLE_CUDA -template -struct DepthwiseConv2dBackpropInputGPULaunch { - static void Run(const GPUDevice& d, const DepthwiseArgs args, - const T* out_backprop, const T* filter, T* in_backprop, - TensorFormat data_format); -}; - -template -struct LaunchDepthwiseConvBackpropInputOp { - static void launch(OpKernelContext* ctx, const DepthwiseArgs args, - const T* out_backprop, const T* filter, T* in_backprop, - TensorFormat data_format) { - const GPUDevice& d = ctx->eigen_device(); - DepthwiseConv2dBackpropInputGPULaunch().Run( - d, args, out_backprop, filter, in_backprop, data_format); - auto stream = ctx->op_device_context()->stream(); - OP_REQUIRES(ctx, stream->ok(), errors::Internal("Launch of gpu kernel for " - "DepthwiseConv2dBackpropInp" - "utGPULaunch failed")); - } -}; +extern template struct LaunchDepthwiseConvBackpropInputOp; +extern template struct LaunchDepthwiseConvBackpropInputOp; #endif // GOOGLE_CUDA @@ -598,7 +575,7 @@ class DepthwiseConv2dNativeBackpropInputOp : public OpKernel { if (input_shape.num_elements() == 0) { return; } - LaunchDepthwiseConvBackpropInputOp::launch( + LaunchDepthwiseConvBackpropInputOp()( context, args, out_backprop_ptr, filter_ptr, in_backprop_ptr, data_format_); } @@ -744,9 +721,9 @@ template struct LaunchDepthwiseConvBackpropFilterOp { typedef typename Eigen::internal::packet_traits::type Packet; - static void launch(OpKernelContext* ctx, const DepthwiseArgs& args, - const T* out_backprop, const T* input, T* filter_backprop, - TensorFormat data_format) { + void operator()(OpKernelContext* ctx, const DepthwiseArgs& args, + const T* out_backprop, const T* input, T* filter_backprop, + TensorFormat data_format) { OP_REQUIRES( ctx, data_format == FORMAT_NHWC, errors::Unimplemented( @@ -907,35 +884,8 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args, #if GOOGLE_CUDA -template -struct DepthwiseConv2dBackpropFilterGPULaunch { - static void Run(const GPUDevice& d, const DepthwiseArgs args, - const T* out_backprop, const T* input, T* filter_backprop, - TensorFormat data_format); -}; - -template -struct LaunchDepthwiseConvBackpropFilterOp { - static void launch(OpKernelContext* ctx, const DepthwiseArgs args, - const T* out_backprop, const T* input, T* filter_backprop, - TensorFormat data_format) { - const GPUDevice& d = ctx->eigen_device(); - auto stream = ctx->op_device_context()->stream(); - - // Initialize the results to 0. - int num_filter_backprop = - args.filter_rows * args.filter_cols * args.out_depth; - perftools::gputools::DeviceMemoryBase filter_bp_ptr(filter_backprop, - num_filter_backprop); - stream->ThenMemset32(&filter_bp_ptr, 0, num_filter_backprop * sizeof(T)); - - DepthwiseConv2dBackpropFilterGPULaunch().Run( - d, args, out_backprop, input, filter_backprop, data_format); - OP_REQUIRES(ctx, stream->ok(), errors::Internal("Launch of gpu kernel for " - "DepthwiseConv2dBackpropFil" - "terGPULaunch failed")); - } -}; +extern template struct LaunchDepthwiseConvBackpropFilterOp; +extern template struct LaunchDepthwiseConvBackpropFilterOp; #endif // GOOGLE_CUDA @@ -1001,7 +951,7 @@ class DepthwiseConv2dNativeBackpropFilterOp : public OpKernel { if (filter_shape.num_elements() == 0) { return; } - LaunchDepthwiseConvBackpropFilterOp::launch( + LaunchDepthwiseConvBackpropFilterOp()( context, args, out_backprop_ptr, input_ptr, filter_backprop_ptr, data_format_); } diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc index 3c01546d8d..bbeeaf7895 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_op.cc @@ -54,9 +54,6 @@ namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -template -struct LaunchDepthwiseConvOp; - // Computes the vectorized product of 'input_buffer' and 'filter' and stores // result in 'output' at location specified by 'out_r' and 'out_c'. // @@ -156,9 +153,9 @@ template struct LaunchDepthwiseConvOp { typedef typename Eigen::internal::packet_traits::type Packet; - static void launch(OpKernelContext* ctx, const DepthwiseArgs& args, - const T* input, const T* depthwise_filter, T* output, - TensorFormat data_format) { + void operator()(OpKernelContext* ctx, const DepthwiseArgs& args, + const T* input, const T* depthwise_filter, T* output, + TensorFormat data_format) { OP_REQUIRES( ctx, data_format == FORMAT_NHWC, errors::Unimplemented( @@ -248,27 +245,9 @@ extern template class LaunchConv2DOp; #if GOOGLE_CUDA -template -struct DepthwiseConv2dGPULaunch { - static void Run(const GPUDevice& d, const DepthwiseArgs args, const T* input, - const T* filter, T* output, TensorFormat data_format); -}; - -template -struct LaunchDepthwiseConvOp { - static void launch(OpKernelContext* ctx, const DepthwiseArgs args, - const T* input, const T* filter, T* output, - TensorFormat data_format) { - const GPUDevice& d = ctx->eigen_device(); - DepthwiseConv2dGPULaunch().Run(d, args, input, filter, output, - data_format); - auto stream = ctx->op_device_context()->stream(); - OP_REQUIRES( - ctx, stream->ok(), - errors::Internal( - "Launch of gpu kernel for DepthwiseConv2dGPULaunch failed")); - } -}; +// Extern template instantiated in depthwise_conv_op_gpu.cc. +extern template struct LaunchDepthwiseConvOp; +extern template struct LaunchDepthwiseConvOp; // Extern template instantiated in conv_ops.cc. extern template class LaunchConv2DOp; @@ -393,9 +372,8 @@ class DepthwiseConv2dNativeOp : public BinaryOp { // If in_depth==1, this operation is just a standard convolution, so // invoke that op. if (std::is_same::value && in_depth == 1) { - launcher_.launch(context, use_cudnn_, cudnn_use_autotune_, input, filter, - stride_, stride_, BrainPadding2EigenPadding(padding_), - output, data_format_); + launcher_(context, use_cudnn_, cudnn_use_autotune_, input, filter, + stride_, stride_, padding_, output, data_format_); return; } @@ -417,8 +395,8 @@ class DepthwiseConv2dNativeOp : public BinaryOp { auto input_ptr = input.template flat().data(); auto filter_ptr = filter.template flat().data(); auto output_ptr = output->template flat().data(); - LaunchDepthwiseConvOp::launch( - context, args, input_ptr, filter_ptr, output_ptr, data_format_); + LaunchDepthwiseConvOp()(context, args, input_ptr, filter_ptr, + output_ptr, data_format_); } private: diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h index 1960b02bbe..aa5b5c76f6 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.h +++ b/tensorflow/core/kernels/depthwise_conv_op.h @@ -56,6 +56,53 @@ struct DepthwiseArgs { out_depth(0) {} }; +// Forward declaration. +class OpKernelContext; + +template +struct LaunchDepthwiseConvOp { + void operator()(OpKernelContext* ctx, const DepthwiseArgs& args, + const T* input, const T* filter, T* output, + TensorFormat data_format); +}; + +template +struct LaunchDepthwiseConvBackpropInputOp { + void operator()(OpKernelContext* ctx, const DepthwiseArgs& args, + const T* out_backprop, const T* filter, T* in_backprop, + TensorFormat data_format); +}; + +template +struct LaunchDepthwiseConvBackpropFilterOp { + void operator()(OpKernelContext* ctx, const DepthwiseArgs& args, + const T* out_backprop, const T* input, T* filter_backprop, + TensorFormat data_format); +}; + +#if GOOGLE_CUDA +template +struct LaunchDepthwiseConvOp { + void operator()(OpKernelContext* ctx, const DepthwiseArgs args, + const T* input, const T* filter, T* output, + TensorFormat data_format); +}; + +template +struct LaunchDepthwiseConvBackpropInputOp { + void operator()(class OpKernelContext* ctx, const DepthwiseArgs& args, + const T* out_backprop, const T* filter, T* in_backprop, + TensorFormat data_format); +}; + +template +struct LaunchDepthwiseConvBackpropFilterOp { + void operator()(class OpKernelContext* ctx, const DepthwiseArgs& args, + const T* out_backprop, const T* input, T* filter_backprop, + TensorFormat data_format); +}; +#endif + } // namespace tensorflow namespace tensorflow { diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc index f63a99a730..fcfcd188d2 100644 --- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc @@ -17,6 +17,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/kernels/depthwise_conv_op.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/cuda_kernel_helper.h" @@ -689,21 +690,27 @@ void LaunchDepthwiseConv2dGPU(const GpuDevice& d, const DepthwiseArgs args, // A simple launch pad to launch the Cuda kernel for depthwise convolution. template -struct DepthwiseConv2dGPULaunch { - static void Run(const GpuDevice& d, const DepthwiseArgs args, const T* input, - const T* filter, T* output, TensorFormat data_format) { - if (args.filter_rows == 3 && args.filter_cols == 3) { - LaunchDepthwiseConv2dGPU(d, args, input, filter, output, +void LaunchDepthwiseConvOp::operator()(OpKernelContext* ctx, + const DepthwiseArgs args, + const T* input, + const T* filter, T* output, + TensorFormat data_format) { + const GPUDevice& d = ctx->eigen_device(); + if (args.filter_rows == 3 && args.filter_cols == 3) { + LaunchDepthwiseConv2dGPU(d, args, input, filter, output, + data_format); + } else { + LaunchDepthwiseConv2dGPU(d, args, input, filter, output, data_format); - } else { - LaunchDepthwiseConv2dGPU(d, args, input, filter, output, - data_format); - } } -}; + auto stream = ctx->op_device_context()->stream(); + OP_REQUIRES(ctx, stream->ok(), + errors::Internal( + "Launch of gpu kernel for DepthwiseConv2dGPULaunch failed")); +} -template struct DepthwiseConv2dGPULaunch; -template struct DepthwiseConv2dGPULaunch; +template struct LaunchDepthwiseConvOp; +template struct LaunchDepthwiseConvOp; // A Cuda kernel to compute the depthwise convolution backprop w.r.t. input. template -struct DepthwiseConv2dBackpropInputGPULaunch { - static void Run(const GpuDevice& d, const DepthwiseArgs args, - const T* out_backprop, const T* filter, T* in_backprop, - TensorFormat data_format) { - if (args.filter_rows == 3 && args.filter_cols == 3) { - LaunchDepthwiseConv2dBackpropInputGPU( - d, args, out_backprop, filter, in_backprop, data_format); - } else { - LaunchDepthwiseConv2dBackpropInputGPU( - d, args, out_backprop, filter, in_backprop, data_format); - } +void LaunchDepthwiseConvBackpropInputOp::operator()( + OpKernelContext* ctx, const DepthwiseArgs& args, const T* out_backprop, + const T* filter, T* in_backprop, TensorFormat data_format) { + const GPUDevice& d = ctx->eigen_device(); + if (args.filter_rows == 3 && args.filter_cols == 3) { + LaunchDepthwiseConv2dBackpropInputGPU( + d, args, out_backprop, filter, in_backprop, data_format); + } else { + LaunchDepthwiseConv2dBackpropInputGPU( + d, args, out_backprop, filter, in_backprop, data_format); } -}; + auto stream = ctx->op_device_context()->stream(); + OP_REQUIRES(ctx, stream->ok(), + errors::Internal("Launch of gpu kernel for " + "DepthwiseConv2dBackpropInp" + "utGPULaunch failed")); +} -template struct DepthwiseConv2dBackpropInputGPULaunch; -template struct DepthwiseConv2dBackpropInputGPULaunch; +template struct LaunchDepthwiseConvBackpropInputOp; +template struct LaunchDepthwiseConvBackpropInputOp; // A Cuda kernel to compute the depthwise convolution backprop w.r.t. filter. template -struct DepthwiseConv2dBackpropFilterGPULaunch { - static void Run(const GpuDevice& d, const DepthwiseArgs args, - const T* out_backprop, const T* input, T* filter_backprop, - TensorFormat data_format) { - if (args.filter_rows == 3 && args.filter_cols == 3) { - LaunchDepthwiseConv2dBackpropFilterGPU( - d, args, out_backprop, input, filter_backprop, data_format); - } else { - LaunchDepthwiseConv2dBackpropFilterGPU( - d, args, out_backprop, input, filter_backprop, data_format); - } +void LaunchDepthwiseConvBackpropFilterOp::operator()( + OpKernelContext* ctx, const DepthwiseArgs& args, const T* out_backprop, + const T* input, T* filter_backprop, TensorFormat data_format) { + const GPUDevice& d = ctx->eigen_device(); + auto stream = ctx->op_device_context()->stream(); + + // Initialize the results to 0. + int num_filter_backprop = + args.filter_rows * args.filter_cols * args.out_depth; + perftools::gputools::DeviceMemoryBase filter_bp_ptr(filter_backprop, + num_filter_backprop); + stream->ThenMemset32(&filter_bp_ptr, 0, num_filter_backprop * sizeof(T)); + + if (args.filter_rows == 3 && args.filter_cols == 3) { + LaunchDepthwiseConv2dBackpropFilterGPU( + d, args, out_backprop, input, filter_backprop, data_format); + } else { + LaunchDepthwiseConv2dBackpropFilterGPU( + d, args, out_backprop, input, filter_backprop, data_format); } -}; + OP_REQUIRES(ctx, stream->ok(), + errors::Internal("Launch of gpu kernel for " + "DepthwiseConv2dBackpropFil" + "terGPULaunch failed")); +} -template struct DepthwiseConv2dBackpropFilterGPULaunch; -template struct DepthwiseConv2dBackpropFilterGPULaunch; +template struct LaunchDepthwiseConvBackpropFilterOp; +template struct LaunchDepthwiseConvBackpropFilterOp; } // namespace tensorflow #endif // GOOGLE_CUDA -- GitLab From 15d233e33095ed5240fd07c63cb3768d5d705fc2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Sep 2017 05:36:53 -0700 Subject: [PATCH 227/294] Fixed compilation with clang. PiperOrigin-RevId: 167489081 --- tensorflow/compiler/xla/tests/BUILD | 1 + tensorflow/compiler/xla/tests/filecheck.cc | 1 + tensorflow/compiler/xla/tests/filecheck.h | 1 + tensorflow/compiler/xla/tools/BUILD | 5 +++++ tensorflow/compiler/xla/tools/replay_computation.cc | 2 +- 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 52b2027aec..9f7ae4ae87 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -215,6 +215,7 @@ cc_library( ], deps = [ "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", "//tensorflow/core:test", diff --git a/tensorflow/compiler/xla/tests/filecheck.cc b/tensorflow/compiler/xla/tests/filecheck.cc index 407b5f4ada..b61544466a 100644 --- a/tensorflow/compiler/xla/tests/filecheck.cc +++ b/tensorflow/compiler/xla/tests/filecheck.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/io/path.h" diff --git a/tensorflow/compiler/xla/tests/filecheck.h b/tensorflow/compiler/xla/tests/filecheck.h index 599bf57ad3..493ff7414b 100644 --- a/tensorflow/compiler/xla/tests/filecheck.h +++ b/tensorflow/compiler/xla/tests/filecheck.h @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/types.h" namespace xla { diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD index a946d335ca..da39ba3ffc 100644 --- a/tensorflow/compiler/xla/tools/BUILD +++ b/tensorflow/compiler/xla/tools/BUILD @@ -111,6 +111,11 @@ cc_binary( deps = [ ":replay_computation_library", "//tensorflow/compiler/plugin/executor:plugin_lib", + # TODO: This dependency is a workaround for linking error with clang. + # Without it, linker complains about missing symbols from + # 'xla_device_launch_op'. This dependency should be propagated from + # plugin_lib instead, but no targets other than this break without it. + "//tensorflow/compiler/jit", ], ) diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc index bd93e114b7..89b26b8916 100644 --- a/tensorflow/compiler/xla/tools/replay_computation.cc +++ b/tensorflow/compiler/xla/tools/replay_computation.cc @@ -144,7 +144,7 @@ int RealMain(tensorflow::gtl::ArraySlice args, int main(int argc, char** argv) { // Flags - string fake_infeed_shape; + xla::string fake_infeed_shape; bool use_fake_data = false; const std::vector flag_list = { tensorflow::Flag("use_fake_data", &use_fake_data, -- GitLab From 549dd6fd66a9b176ee3fe5e7093e4a1654bcbdb1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Sep 2017 08:39:02 -0700 Subject: [PATCH 228/294] Add an argument `stop_gradients` to `tf.gradients` in order to hold specific tensors constant wrt `xs`. PiperOrigin-RevId: 167501127 --- tensorflow/python/BUILD | 2 +- tensorflow/python/ops/gradients_impl.py | 54 ++++++++++++++---- tensorflow/python/ops/gradients_test.py | 58 ++++++++++++++++++++ tensorflow/tools/api/golden/tensorflow.pbtxt | 2 +- 4 files changed, 104 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 98dce82ee3..26e0f86c37 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2349,7 +2349,7 @@ cuda_py_test( cuda_py_test( name = "gradients_test", - size = "small", + size = "medium", srcs = ["ops/gradients_test.py"], additional_deps = [ ":array_grad", diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 64987f93dd..cb7d409f3b 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -278,7 +278,7 @@ def _VerifyGeneratedGradients(grads, op): "inputs %d" % (len(grads), op.node_def, len(op.inputs))) -def _StopOps(from_ops, pending_count): +def _StopOps(from_ops, stop_gradient_ops, pending_count): """The set of ops that terminate the gradient computation. This computes the frontier of the forward graph *before* which backprop @@ -288,8 +288,11 @@ def _StopOps(from_ops, pending_count): `_PendingCount(g, xs, from_ops)`. An 'op' has predecessors in `from_ops` iff pending_count[op._id] > 0. + In addition, none of `stop_gradient_ops` will be differentiated. + Args: from_ops: list of Operations. + stop_gradient_ops: list of Operations never to backprop through. pending_count: List of integers, indexed by operation id. Returns: @@ -304,6 +307,7 @@ def _StopOps(from_ops, pending_count): break if is_stop_op: stop_ops.add(op._id) + stop_ops.update(op._id for op in stop_gradient_ops) # pylint: disable=protected-access return stop_ops @@ -374,17 +378,17 @@ def gradients(ys, name="gradients", colocate_gradients_with_ops=False, gate_gradients=False, - aggregation_method=None): - """Constructs symbolic partial derivatives of sum of `ys` w.r.t. x in `xs`. + aggregation_method=None, + stop_gradients=None): + """Constructs symbolic derivatives of sum of `ys` w.r.t. x in `xs`. `ys` and `xs` are each a `Tensor` or a list of tensors. `grad_ys` is a list of `Tensor`, holding the gradients received by the `ys`. The list must be the same length as `ys`. - `gradients()` adds ops to the graph to output the partial - derivatives of `ys` with respect to `xs`. It returns a list of - `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)` - for y in `ys`. + `gradients()` adds ops to the graph to output the derivatives of `ys` with + respect to `xs`. It returns a list of `Tensor` of length `len(xs)` where + each tensor is the `sum(dy/dx)` for y in `ys`. `grad_ys` is a list of tensors of the same length as `ys` that holds the initial gradients for each y in `ys`. When `grad_ys` is None, @@ -394,6 +398,31 @@ def gradients(ys, one wanted to weight the gradient differently for each value in each y). + `stop_gradients` is a `Tensor` or a list of tensors to be considered constant + with respect to all `xs`. These tensors will not be backpropagated through, + as though they had been explicitly disconnected using `stop_gradient`. Among + other things, this allows computation of partial derivatives as opposed to + total derivatives. For example: + + a = tf.constant(0.) + b = 2 * a + g = tf.gradients(a + b, [a, b], stop_gradients=[a, b]) + + Here the partial derivatives `g` evaluate to `[1.0, 1.0]`, compared to the + total derivatives `tf.gradients(a + b, [a, b])`, which take into account the + influence of `a` on `b` and evaluate to `[3.0, 1.0]`. Note that the above is + equivalent to: + + a = tf.stop_gradient(tf.constant(0.)) + b = tf.stop_gradient(2 * a) + g = tf.gradients(a + b, [a, b]) + + `stop_gradients` provides a way of stopping gradient after the graph has + already been constructed, as compared to `tf.stop_gradient` which is used + during graph construction. When the two approaches are combined, + backpropagation stops at both `tf.stop_gradient` nodes and nodes in + `stop_gradients`, whichever is encountered first. + Args: ys: A `Tensor` or list of tensors to be differentiated. xs: A `Tensor` or list of tensors to be used for differentiation. @@ -407,6 +436,8 @@ def gradients(ys, for an operations. This avoids some race conditions. aggregation_method: Specifies the method used to combine gradient terms. Accepted values are constants defined in the class `AggregationMethod`. + stop_gradients: Optional. A `Tensor` or list of tensors not to differentiate + through. Returns: A list of `sum(dy/dx)` for each x in `xs`. @@ -423,12 +454,15 @@ def gradients(ys, "functions in tf.contrib.eager.backprop instead.") ys = _AsList(ys) xs = _AsList(xs) + stop_gradients = [] if stop_gradients is None else _AsList(stop_gradients) if grad_ys is None: grad_ys = [None] * len(ys) else: grad_ys = _AsList(grad_ys) - with ops.name_scope(name, "gradients", ys + xs + grad_ys) as grad_scope: + with ops.name_scope( + name, "gradients", + list(ys) + list(xs) + list(stop_gradients) + list(grad_ys)) as grad_scope: ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y") xs = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable) else x @@ -450,6 +484,7 @@ def gradients(ys, ys = [array_ops.identity(y) if y.consumers() else y for y in ys] to_ops = [t.op for t in ys] from_ops = [t.op for t in xs] + stop_gradient_ops = [t.op for t in stop_gradients] pending_count, loop_state = _PendingCount(ops.get_default_graph(), to_ops, from_ops, colocate_gradients_with_ops) @@ -488,8 +523,7 @@ def gradients(ys, _SetGrad(grads, y, loop_state.ZerosLikeForExit(y)) queue.append(y.op) - # The set of 'from_ops'. - stop_ops = _StopOps(from_ops, pending_count) + stop_ops = _StopOps(from_ops, stop_gradient_ops, pending_count) while queue: # generate gradient subgraph for op. op = queue.popleft() diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index 11c204b5b7..7a561d046a 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -349,6 +349,64 @@ class GradientsTest(test_util.TensorFlowTestCase): g = gradients.gradients([z, z2], x) self.assertAllClose(17502.0, g[0].eval()) + def testPartialDerivatives(self): + with self.test_session(): + x = constant_op.constant(1.) + y = 2 * x + z = x + y + totalg = gradients.gradients(z, [x, y]) + self.assertEqual([3.0, 1.0], [g.eval() for g in totalg]) + partialg = gradients.gradients(z, [x, y], stop_gradients=[x, y]) + self.assertEqual([1.0, 1.0], [g.eval() for g in partialg]) + + def testStopGradients(self): + def _MakeGraph(rng, stop_gradients=()): + def _FunctionOf(xs, k=3): + return ops.convert_to_tensor( + sum(math_ops.matmul(rng.rand(k, k), x) for x in xs) + + rng.rand(k, k)) + + a = _FunctionOf([]) + if "a" in stop_gradients: a = array_ops.stop_gradient(a) + b = _FunctionOf([a]) + if "b" in stop_gradients: b = array_ops.stop_gradient(b) + c = _FunctionOf([a, b]) + if "c" in stop_gradients: c = array_ops.stop_gradient(c) + d = _FunctionOf([b, c]) + if "d" in stop_gradients: d = array_ops.stop_gradient(d) + return dict(a=a, b=b, c=c, d=d) + + def _Gradients(ys, xs, **kwargs): + dydxs = gradients.gradients(ys, xs, **kwargs) + dydxs = [0. * x if dydx is None else dydx + for x, dydx in zip(xs, dydxs)] + return dydxs + + seed = np.random.randint(1000) + cases = [] + subsets = [""] + "a b c d ab ac ad bc bd cd abc abd acd bcd abcd".split() + graph = _MakeGraph(np.random.RandomState(seed)) + for constants in subsets: + graph_with_stops = _MakeGraph(np.random.RandomState(seed), constants) + for variables_ in subsets: + # compute the gradient when stopped using tf.stop_gradients + grad1 = _Gradients([graph_with_stops["d"]], + [graph_with_stops[v] for v in variables_]) + # compute the gradient when stopped using the stop_gradients kwarg + grad2 = _Gradients([graph["d"]], + [graph[v] for v in variables_], + stop_gradients=[graph[v] for v in constants]) + cases.append(dict(grad1=grad1, grad2=grad2, + constants=constants, variables=variables_)) + + # evaluate all tensors in one call to session.run for speed + with self.test_session() as session: + results = session.run([(case["grad1"], case["grad2"]) for case in cases]) + + for (npgrad1, npgrad2), case in zip(results, cases): + for a, b in zip(npgrad1, npgrad2): + np.testing.assert_allclose(a, b) + class FunctionGradientsTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 667ae5cf6e..8a65cb0a7b 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1098,7 +1098,7 @@ tf_module { } member_method { name: "gradients" - argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\'], " + argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\'], " } member_method { name: "greater" -- GitLab From e44eeab6cf6739cc68783a3ed14c6e41503c6ef3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Sep 2017 11:12:07 -0700 Subject: [PATCH 229/294] Bugfix for fill_lower_triangular. PiperOrigin-RevId: 167509581 --- tensorflow/python/ops/distributions/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py index 59add19a58..0c6096a075 100644 --- a/tensorflow/python/ops/distributions/util.py +++ b/tensorflow/python/ops/distributions/util.py @@ -770,7 +770,7 @@ def fill_lower_triangular(x, validate_args=False, name="fill_lower_triangular"): else: d = math_ops.cast(array_ops.shape(x)[-1], dtype=dtypes.float32) # d = n(n+1)/2 implies n is: - n = math_ops.cast(0.5 * (dtypes.sqrt(1. + 8. * d) - 1.), + n = math_ops.cast(0.5 * (math_ops.sqrt(1. + 8. * d) - 1.), dtype=dtypes.int32) if validate_args: is_valid_input_shape = check_ops.assert_equal( -- GitLab From 24ddee8659c3fd7f8d2db02efef7d96de53cdbae Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 4 Sep 2017 14:02:54 -0700 Subject: [PATCH 230/294] Expose component parts of HloInstruction's string representation. PiperOrigin-RevId: 167516835 --- .../compiler/xla/service/hlo_instruction.cc | 70 +++++++++++-------- .../compiler/xla/service/hlo_instruction.h | 8 +++ 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 75b88aeb12..24ef4e09e7 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1662,6 +1662,21 @@ string HloInstruction::ExtendedOpcodeStr() const { string HloInstruction::ToString(bool compact_operands, bool include_metadata) const { + string result = + StrCat(name(), " = ", ShapeUtil::HumanStringWithLayout(shape()), " ", + ExtendedOpcodeStr(), "(", OperandsToString(compact_operands), ")"); + for (const string& extra : ExtraAttributesToString()) { + StrAppend(&result, ", ", extra); + } + if (include_metadata && + (!metadata_.op_type().empty() || !metadata_.op_name().empty() || + !metadata_.source_file().empty())) { + StrAppend(&result, " # metadata=", metadata_.ShortDebugString()); + } + return result; +} + +string HloInstruction::OperandsToString(bool compact) const { string operands; if (opcode() == HloOpcode::kConstant) { // For constants, show the actual value in place of an empty operand list. @@ -1690,12 +1705,12 @@ string HloInstruction::ToString(bool compact_operands, } else { tensorflow::gtl::ArraySlice slice(operands_); const int64 kMaxOperandsToShowIfCompact = 4; - if (compact_operands && slice.size() > kMaxOperandsToShowIfCompact) { + if (compact && slice.size() > kMaxOperandsToShowIfCompact) { slice.remove_suffix(slice.size() - kMaxOperandsToShowIfCompact); } operands = Join(slice, ", ", [&](string* out, HloInstruction* operand) { *out += ShapeUtil::HumanStringWithLayout(operand->shape()); - if (!compact_operands) { + if (!compact) { StrAppend(out, " ", operand->name()); } }); @@ -1704,15 +1719,19 @@ string HloInstruction::ToString(bool compact_operands, StrAppend(&operands, ", ...(+", remaining, ")"); } } - string extra; + return operands; +} + +std::vector HloInstruction::ExtraAttributesToString() const { + std::vector extra; if (CanHaveDimensionsField()) { - StrAppend(&extra, ", dimensions={", Join(dimensions(), ","), "}"); + extra.push_back(StrCat("dimensions={", Join(dimensions(), ","), "}")); } if (window_ != nullptr) { - StrAppend(&extra, ", ", window_util::ToString(*window_)); + extra.push_back(window_util::ToString(*window_)); } if (padding_config_ != nullptr) { - StrAppend(&extra, ", padding=", padding_config_->ShortDebugString()); + extra.push_back(StrCat("padding=", padding_config_->ShortDebugString())); } if (!slice_starts_.empty() && !slice_limits_.empty()) { std::vector bounds; @@ -1721,45 +1740,38 @@ string HloInstruction::ToString(bool compact_operands, bounds.push_back( StrCat("[", slice_starts_[i], ":", slice_limits_[i], "]")); } - StrAppend(&extra, ", slice={", Join(bounds, ", "), "}"); + extra.push_back(StrCat("slice={", Join(bounds, ", "), "}")); } if (convolution_dimension_numbers_ != nullptr) { - StrAppend(&extra, ", ", ConvolutionDimensionNumbersToString()); + extra.push_back(ConvolutionDimensionNumbersToString()); } if (opcode() == HloOpcode::kWhile) { - StrAppend(&extra, ", condition=", while_condition()->name()); - StrAppend(&extra, ", body=", while_body()->name()); + extra.push_back(StrCat("condition=", while_condition()->name())); + extra.push_back(StrCat("body=", while_body()->name())); } else if (opcode() == HloOpcode::kSelectAndScatter) { - StrAppend(&extra, ", select=", select()->name()); - StrAppend(&extra, ", scatter=", scatter()->name()); + extra.push_back(StrCat("select=", select()->name())); + extra.push_back(StrCat("scatter=", scatter()->name())); } else if (!called_computations().empty()) { - StrAppend(&extra, ", calls=", - Join(called_computations(), ", ", - [](string* out, const HloComputation* computation) { - StrAppend(out, computation->name()); - })); + extra.push_back(StrCat( + "calls=", Join(called_computations(), ", ", + [](string* out, const HloComputation* computation) { + StrAppend(out, computation->name()); + }))); } if (opcode() == HloOpcode::kGetTupleElement) { - StrAppend(&extra, ", index=", tuple_index()); + extra.push_back(StrCat("index=", tuple_index())); } if (!control_successors_.empty()) { - StrAppend( - &extra, ", control-successors=", + extra.push_back(StrCat( + "control-successors=", Join(control_successors_, ", ", [](string* out, HloInstruction* succ) { StrAppend(out, succ->name()); - })); + }))); } - if (include_metadata && - (!metadata_.op_type().empty() || !metadata_.op_name().empty() || - !metadata_.source_file().empty())) { - StrAppend(&extra, " # metadata=", metadata_.ShortDebugString()); - } - - return StrCat(name(), " = ", ShapeUtil::HumanStringWithLayout(shape()), " ", - ExtendedOpcodeStr(), "(", operands, ")", extra); + return extra; } string HloInstruction::ToShortString() const { diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index e393e05c34..ca6f27bd40 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -548,6 +548,14 @@ class HloInstruction { string ToString(bool compact_operands = false, bool include_metadata = true) const; + // Components of the ToString() representation: + + // Returns a string representation of the operand list. + string OperandsToString(bool compact) const; + + // Returns string representation of op-specific attributes. + std::vector ExtraAttributesToString() const; + string ToStringNoMetadata() const { return ToString(false, false); } // As ToString, but returns a shorter string. -- GitLab From 155b45698a40a12d4fef4701275ecce07c3bb01a Mon Sep 17 00:00:00 2001 From: Yun Peng Date: Mon, 4 Sep 2017 17:51:52 +0200 Subject: [PATCH 231/294] Revert "Add vmodule support to TF OSS logging." This reverts commit 072b0c9c552837a97c967963628b3c30951388f9. --- tensorflow/core/BUILD | 2 - tensorflow/core/platform/default/logging.cc | 64 +--------- tensorflow/core/platform/default/logging.h | 46 ++----- .../core/platform/vmodule_benchmark_test.cc | 28 ----- tensorflow/core/platform/vmodule_test.cc | 117 ------------------ 5 files changed, 10 insertions(+), 247 deletions(-) delete mode 100644 tensorflow/core/platform/vmodule_benchmark_test.cc delete mode 100644 tensorflow/core/platform/vmodule_test.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 35394eeb87..9db2ed830f 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2156,8 +2156,6 @@ tf_cc_tests( "platform/port_test.cc", "platform/profile_utils/cpu_utils_test.cc", "platform/subprocess_test.cc", - "platform/vmodule_benchmark_test.cc", - "platform/vmodule_test.cc", ], deps = [ ":lib", diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc index ac0988e704..ebdd4b624a 100644 --- a/tensorflow/core/platform/default/logging.cc +++ b/tensorflow/core/platform/default/logging.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/platform/default/logging.h" -#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/env_time.h" #include "tensorflow/core/platform/macros.h" @@ -25,12 +24,8 @@ limitations under the License. #endif #include -#include #include -#include -#include - namespace tensorflow { namespace internal { @@ -88,11 +83,11 @@ void LogMessage::GenerateLogMessage() { const size_t time_buffer_size = 30; char time_buffer[time_buffer_size]; strftime(time_buffer, time_buffer_size, "%Y-%m-%d %H:%M:%S", - localtime(&now_seconds)); + localtime(&now_seconds)); // TODO(jeff,sanjay): Replace this with something that logs through the env. fprintf(stderr, "%s.%06d: %c %s:%d] %s\n", time_buffer, micros_remainder, - "IWEF"[severity_], fname_, line_, str().c_str()); + "IWEF"[severity_], fname_, line_, str().c_str()); } #endif @@ -129,48 +124,6 @@ int64 MinVLogLevelFromEnv() { return LogLevelStrToInt(tf_env_var_val); } -using VmoduleMap = std::unordered_map; - -// Returns a mapping from module name to VLOG level, derived from the -// TF_CPP_VMOUDLE environment variable; ownership is transferred to the caller. -VmoduleMap* VmoduleRecordsFromEnv() { - // The value of the env var is supposed to be of the form: - // "foo=1,bar=2,baz=3" - const char* tf_env_var_val = getenv("TF_CPP_VMODULE"); - auto* result = new VmoduleMap(); - if (tf_env_var_val == nullptr) return result; - while (true) { - const char* eq = strchr(tf_env_var_val, '='); - if (eq == nullptr) break; - const char* after_eq = eq + 1; - - // Comma either points at the next comma delimiter, or at a null terminator. - // We check that the integer we parse ends at this delimiter. - const char* comma = strchr(after_eq, ','); - const char* new_tf_env_var_val; - if (comma == nullptr) { - comma = strchr(after_eq, '\0'); - new_tf_env_var_val = comma; - } else { - new_tf_env_var_val = comma + 1; - } - - char* endptr = nullptr; - int level = strtol(after_eq, &endptr, 10); - if (endptr != comma) { - fprintf(stderr, - "warning: could not parse integer in vmodule specification in " - "\"%s\".\n", - after_eq); - break; - } - StringPiece module(tf_env_var_val, eq - tf_env_var_val); - tf_env_var_val = new_tf_env_var_val; - (*result)[module] = level; - } - return result; -} - } // namespace LogMessage::~LogMessage() { @@ -184,19 +137,6 @@ int64 LogMessage::MinVLogLevel() { return min_vlog_level; } -bool LogMessage::VmoduleActivated(const char* fname, int lvl) { - static VmoduleMap* vmodule_records = VmoduleRecordsFromEnv(); - const char* last_slash = strrchr(fname, '/'); - const char* module_start = last_slash == nullptr ? fname : last_slash + 1; - const char* dot_after = strchr(module_start, '.'); - const char* module_limit = - dot_after == nullptr ? strchr(fname, '\0') : dot_after; - StringPiece module(module_start, module_limit - module_start); - auto it = vmodule_records->find(module); - if (it == vmodule_records->end()) return false; - return it->second >= lvl; -} - LogMessageFatal::LogMessageFatal(const char* file, int line) : LogMessage(file, line, FATAL) {} LogMessageFatal::~LogMessageFatal() { diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h index c8c9b2da11..04ff9e12b6 100644 --- a/tensorflow/core/platform/default/logging.h +++ b/tensorflow/core/platform/default/logging.h @@ -46,16 +46,6 @@ class LogMessage : public std::basic_ostringstream { // but VLOG(3) will not. Defaults to 0. static int64 MinVLogLevel(); - // Returns whether VLOG level lvl is activated for the file fname. - // - // E.g. if the environment variable TF_CPP_VMODULE contains foo=3 and fname is - // foo.cc and lvl is <= 3, this will return true. - // - // It is expected that the result of this query will be cached in the VLOG-ing - // call site to avoid repeated lookups. This routine performs a hash-map - // access against the VLOG-ing specification provided by the env var. - static bool VmoduleActivated(const char* fname, int lvl); - protected: void GenerateLogMessage(); @@ -86,38 +76,18 @@ class LogMessageFatal : public LogMessage { #define LOG(severity) _TF_LOG_##severity -#if defined(IS_MOBILE_PLATFORM) - +#ifdef IS_MOBILE_PLATFORM // Turn VLOG off when under mobile devices for considerations of binary size. -#define _VLOG_IS_ON(lvl, file) ((lvl) <= 0) - -#elif defined(PLATFORM_WINDOWS) - -// TODO(b/64279502) The _VLOG_IS_ON definition below appears to cause MSVC to -// fatal error, so we fall back to the vmodule-less implementation for now. -#define _VLOG_IS_ON(lvl, file) \ - ((lvl) <= ::tensorflow::internal::LogMessage::MinVLogLevel()) - +#define VLOG_IS_ON(lvl) ((lvl) <= 0) #else - -// Otherwise, set TF_CPP_MIN_VLOG_LEVEL environment to update minimum log level -// of VLOG, or TF_CPP_VMODULE to set the minimum log level for individual -// translation units. -#define _VLOG_IS_ON(lvl, file) \ - (([](int level, const char* fname) { \ - if (level <= ::tensorflow::internal::LogMessage::MinVLogLevel()) \ - return true; \ - static bool vmodule_activated = \ - ::tensorflow::internal::LogMessage::VmoduleActivated(fname, level); \ - return vmodule_activated; \ - })(lvl, file)) - +// Otherwise, Set TF_CPP_MIN_VLOG_LEVEL environment to update minimum log level +// of VLOG +#define VLOG_IS_ON(lvl) \ + ((lvl) <= ::tensorflow::internal::LogMessage::MinVLogLevel()) #endif -#define VLOG_IS_ON(lvl) _VLOG_IS_ON(lvl, __FILE__) - -#define VLOG(lvl) \ - if (TF_PREDICT_FALSE(_VLOG_IS_ON(lvl, __FILE__))) \ +#define VLOG(lvl) \ + if (TF_PREDICT_FALSE(VLOG_IS_ON(lvl))) \ ::tensorflow::internal::LogMessage(__FILE__, __LINE__, tensorflow::INFO) // CHECK dies with a fatal error if condition is not true. It is *not* diff --git a/tensorflow/core/platform/vmodule_benchmark_test.cc b/tensorflow/core/platform/vmodule_benchmark_test.cc deleted file mode 100644 index 0f9e75bf9c..0000000000 --- a/tensorflow/core/platform/vmodule_benchmark_test.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/test_benchmark.h" - -namespace tensorflow { - -static void BM_DisabledVlog(int iters) { - for (int i = 0; i < iters; ++i) { - VLOG(1) << "Testing VLOG(1)!"; - } -} -BENCHMARK(BM_DisabledVlog); - -} // namespace tensorflow diff --git a/tensorflow/core/platform/vmodule_test.cc b/tensorflow/core/platform/vmodule_test.cc deleted file mode 100644 index 47b4b2e0e7..0000000000 --- a/tensorflow/core/platform/vmodule_test.cc +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Test that popens a child process with the VLOG-ing environment variable set -// for the logging framework, and observes VLOG_IS_ON and VLOG macro output. - -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/platform.h" -#include "tensorflow/core/platform/test.h" - -#include - -namespace tensorflow { -namespace { - -int RealMain(const char* argv0, bool do_vlog) { - if (do_vlog) { -#if !defined(PLATFORM_GOOGLE) - // Note, we only test this when !defined(PLATFORM_GOOGLE) because - // VmoduleActivated doesn't exist in that implementation. - // - // Also, we call this internal API to simulate what would happen if - // differently-named translation units attempted to VLOG, so we don't need - // to create dummy translation unit files. - bool ok = internal::LogMessage::VmoduleActivated("vmodule_test.cc", 7) && - internal::LogMessage::VmoduleActivated("shoobadooba.h", 3); - if (!ok) { - fprintf(stderr, "vmodule activated levels not as expected.\n"); - return EXIT_FAILURE; - } -#endif - - // Print info on which VLOG levels are activated. - fprintf(stderr, "VLOG_IS_ON(8)? %d\n", VLOG_IS_ON(8)); - fprintf(stderr, "VLOG_IS_ON(7)? %d\n", VLOG_IS_ON(7)); - fprintf(stderr, "VLOG_IS_ON(6)? %d\n", VLOG_IS_ON(6)); - // Do some VLOG-ing. - VLOG(8) << "VLOG(8)"; - VLOG(7) << "VLOG(7)"; - VLOG(6) << "VLOG(6)"; - LOG(INFO) << "INFO"; - return EXIT_SUCCESS; - } - - // Popen the child process. - std::string command = std::string(argv0); -#if defined(PLATFORM_GOOGLE) - command = command + " do_vlog --vmodule=vmodule_test=7 --alsologtostderr"; -#else - command = - "TF_CPP_VMODULE=vmodule_test=7,shoobadooba=3 " + command + " do_vlog"; -#endif - command += " 2>&1"; - fprintf(stderr, "Running: \"%s\"\n", command.c_str()); - FILE* f = popen(command.c_str(), "r"); - if (f == nullptr) { - fprintf(stderr, "Failed to popen child: %s\n", strerror(errno)); - return EXIT_FAILURE; - } - - // Read data from the child's stdout. - constexpr int kBufferSizeBytes = 4096; - char buffer[kBufferSizeBytes]; - size_t result = fread(buffer, sizeof(buffer[0]), kBufferSizeBytes - 1, f); - if (result == 0) { - fprintf(stderr, "Failed to read from child stdout: %zu %s\n", result, - strerror(errno)); - return EXIT_FAILURE; - } - buffer[result] = '\0'; - int status = pclose(f); - if (status == -1) { - fprintf(stderr, "Failed to close popen child: %s\n", strerror(errno)); - return EXIT_FAILURE; - } - - // Check output is as expected. - const char kExpected[] = - "VLOG_IS_ON(8)? 0\nVLOG_IS_ON(7)? 1\nVLOG_IS_ON(6)? 1\n"; - if (strstr(buffer, kExpected) == nullptr) { - fprintf(stderr, "error: unexpected output from child: \"%.*s\"\n", - kBufferSizeBytes, buffer); - return EXIT_FAILURE; - } - bool ok = strstr(buffer, "VLOG(7)\n") != nullptr && - strstr(buffer, "VLOG(6)\n") != nullptr && - strstr(buffer, "VLOG(8)\n") == nullptr; - if (!ok) { - fprintf(stderr, "error: VLOG output not as expected: \"%.*s\"\n", - kBufferSizeBytes, buffer); - return EXIT_FAILURE; - } - - // Success! - return EXIT_SUCCESS; -} - -} // namespace -} // namespace tensorflow - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - bool do_vlog = argc >= 2 && strcmp(argv[1], "do_vlog") == 0; - return tensorflow::RealMain(argv[0], do_vlog); -} -- GitLab From f6d7e459f9780913b7b0b7be12e856e0999ef99b Mon Sep 17 00:00:00 2001 From: resec Date: Tue, 5 Sep 2017 14:20:21 +0800 Subject: [PATCH 232/294] fix compile_nsync.sh compile_nsync.sh is not workable with android x86 arch, here is the fix by replacing arm specific arch option. --- tensorflow/contrib/makefile/compile_nsync.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index 207661ee46..729ab6b037 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -215,12 +215,12 @@ for arch in $archs; do armeabi-v7a) toolchain="arm-linux-androideabi-4.9" sysroot_arch="arm" bin_prefix="arm-linux-androideabi" - march_option="-march=armv7-a" + march_option="-march=armv7-a -mfloat-abi=softfp -mfpu=neon" ;; armeabi-v7a-hard) toolchain="arm-linux-androideabi-4.9" sysroot_arch="arm" bin_prefix="arm-linux-androideabi" - march_option="-march=armv7-a" + march_option="-march=armv7-a -mfpu=neon" ;; mips) toolchain="mipsel-linux-android-4.9" sysroot_arch="mips" @@ -266,8 +266,7 @@ for arch in $archs; do -I$(NDK_ROOT)/sources/cxx-stl/gnu-libstdc++/4.9/libs/'"$arch"'/include \ -I../../platform/c++11 -I../../platform/gcc \ -I../../platform/posix -pthread - PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' \ - -mfloat-abi=softfp -mfpu=neon -fPIE + PLATFORM_CFLAGS=-std=c++11 -Wno-narrowing '"$march_option"' -fPIE PLATFORM_LDFLAGS=-pthread MKDEP=${CC} -M -std=c++11 PLATFORM_C=../../platform/c++11/src/nsync_semaphore_mutex.cc \ -- GitLab From 043f89e5fcc7758bfa134aacbae89055cf5ebdf1 Mon Sep 17 00:00:00 2001 From: ben Date: Tue, 5 Sep 2017 19:45:21 +0900 Subject: [PATCH 233/294] FIx typo & clarify argument type in comment --- tensorflow/python/profiler/model_analyzer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py index 5345949664..a1fe47982f 100644 --- a/tensorflow/python/profiler/model_analyzer.py +++ b/tensorflow/python/profiler/model_analyzer.py @@ -117,7 +117,7 @@ class Profiler(object): ```python Typical use case: # Currently we are only allowed to create 1 profiler per process. - profiler = Profile(sess.graph) + profiler = Profiler(sess.graph) for i in xrange(total_steps): if i % 10000 == 0: @@ -174,7 +174,7 @@ class Profiler(object): """Add statistics of a step. Args: - step: A step uint64 used to identify the RunMetadata. Must be different + step: int, A step used to identify the RunMetadata. Must be different across different AddStep() calls. run_meta: RunMetadata proto that contains statistics of a session run. """ -- GitLab From 07050d5173b1a12b9a491e20f51ff8c72199b2c0 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Tue, 5 Sep 2017 09:38:19 -0400 Subject: [PATCH 234/294] Install name fix for the libtensorflow.so file. --- tensorflow/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 5b6a18b6a6..92e2989967 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -464,6 +464,7 @@ cc_binary( "//tensorflow:darwin": [ "-Wl,-exported_symbols_list", # This line must be directly followed by the exported_symbols.lds file "//tensorflow/c:exported_symbols.lds", + "-Wl,-install_name,@rpath/libtensorflow.so", ], "//tensorflow:windows": [], "//tensorflow:windows_msvc": [], -- GitLab From 625caf829f92e172c29c6b0e07f57665aa4f0068 Mon Sep 17 00:00:00 2001 From: mdfaijul Date: Tue, 29 Aug 2017 13:08:14 -0700 Subject: [PATCH 235/294] MklReshapeOp fix for TensorShape int64 support --- tensorflow/core/kernels/mkl_reshape_op.cc | 67 +++++++++++++++++------ 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc index b3763f17bc..03c3fb09a1 100644 --- a/tensorflow/core/kernels/mkl_reshape_op.cc +++ b/tensorflow/core/kernels/mkl_reshape_op.cc @@ -43,30 +43,26 @@ class MklReshapeOp : public OpKernel { OP_REQUIRES(context, IsLegacyVector(sizes.shape()), errors::InvalidArgument("sizes input must be 1-D, not shape ", sizes.shape().DebugString())); - const int64 num_dims = sizes.NumElements(); // Compute the output shape. Determine product of specified // dimensions, and find the index of the unspecified one. TensorShape shape; int64 product = 1; int unknown_index = -1; - auto vec_size = sizes.flat(); - for (int d = 0; d < num_dims; ++d) { - const int32 size = vec_size(d); - if (size == -1) { - OP_REQUIRES( - context, unknown_index == -1, - errors::InvalidArgument("only one input size may be -1, not both ", - unknown_index, " and ", d)); - unknown_index = d; - shape.AddDim(1); - } else { - OP_REQUIRES(context, size >= 0, - errors::InvalidArgument( - "size ", d, " must be non-negative, not ", size)); - shape.AddDim(size); - product *= size; - } + switch (sizes.dtype()) { + case DT_INT32: + OP_REQUIRES_OK(context, ValidateSizes(sizes, &product, + &unknown_index, &shape)); + break; + case DT_INT64: + OP_REQUIRES_OK(context, ValidateSizes(sizes, &product, + &unknown_index, &shape)); + break; + default: + context->CtxFailure(errors::InvalidArgument( + "desired shape must be a DT_INT32 or DT_INT64 vector, not a ", + DataTypeString(sizes.dtype()))); + return; } if (unknown_index != -1) { OP_REQUIRES( @@ -132,6 +128,34 @@ class MklReshapeOp : public OpKernel { CopyTfTensorInToOutWithShape(context, 0, 0, shape); } } + private: + template + Status ValidateSizes(const Tensor& sizes, int64* product, int* unknown_index, + TensorShape* shape) { + *product = 1; + *unknown_index = -1; + const int64 num_dims = sizes.NumElements(); + auto Svec = sizes.flat(); + for (int d = 0; d < num_dims; ++d) { + const Tshape size = Svec(d); + if (size == -1) { + if (*unknown_index != -1) { + return errors::InvalidArgument( + "Only one input size may be -1, not both ", *unknown_index, + " and ", d); + } + *unknown_index = d; + shape->AddDim(1); + } else if (size < 0) { + return errors::InvalidArgument("Size ", d, + " must be non-negative, not ", size); + } else { + shape->AddDim(size); + (*product) *= size; + } + } + return Status::OK(); + } }; #define REGISTER_MKL_CPU(T) \ @@ -141,6 +165,13 @@ class MklReshapeOp : public OpKernel { .TypeConstraint("T") \ .TypeConstraint("Tshape") \ .Label(mkl_op_registry::kMklOpLabel), \ + MklReshapeOp); \ + REGISTER_KERNEL_BUILDER(Name("_MklReshape") \ + .Device(DEVICE_CPU) \ + .HostMemory("shape") \ + .TypeConstraint("T") \ + .TypeConstraint("Tshape") \ + .Label(mkl_op_registry::kMklOpLabel), \ MklReshapeOp); TF_CALL_float(REGISTER_MKL_CPU); #undef REGISTER_MKL_CPU -- GitLab From 33dc48347c4a4872ea51d228c1b0334920e04850 Mon Sep 17 00:00:00 2001 From: Akimasa KIMURA Date: Tue, 29 Aug 2017 22:23:21 +0900 Subject: [PATCH 236/294] Update welcome.md Add TensorFlow User Group Utsunomiya into TensorFlow Communities Around the World. --- tensorflow/docs_src/community/welcome.md | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/docs_src/community/welcome.md b/tensorflow/docs_src/community/welcome.md index 194649a304..4991783a53 100644 --- a/tensorflow/docs_src/community/welcome.md +++ b/tensorflow/docs_src/community/welcome.md @@ -37,6 +37,7 @@ Asia: * [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) _(Korean language)_ * [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) _(Japanese Language)_ * [Soleil Data Dojo](https://soleildatadojo.connpass.com/) _(Japanese language)_ +* [TensorFlow User Group Utsunomiya](https://tfug-utsunomiya.connpass.com/) Europe: -- GitLab From f96b25cceeb5a97d87ae2f7b0cc22186a7495d66 Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Tue, 18 Jul 2017 15:14:34 -0700 Subject: [PATCH 237/294] Adding support for connecting filter output from Conv2D to Conv2DBackpropInput --- tensorflow/core/graph/mkl_layout_pass.cc | 62 ++++++++++++++++++++++-- tensorflow/core/ops/nn_ops.cc | 4 ++ 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 2f9ceaa3bd..ad19cf6faa 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -1099,6 +1099,46 @@ int MklLayoutRewritePass::SetUpContiguousInputs( CHECK_NOTNULL(workspace_tensors); CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + // Temporary solution to connect filter input of BackpropInput with the + // converted filter from Conv2D. + bool do_connect_conv2d_backprop_input_filter = false; + Node* conv2d_node = nullptr; + // Filter node is 2nd input (slot index 1) of Conv2D. + int kConv2DFilterInputSlotIdx = 1; + int kConv2DBackpropInputFilterInputSlotIdx = 1; + int kConv2DFilterOutputSlotIdx = 1; + if (old_node->type_string() == csinfo_.conv2d_grad_input) { + // We need to find Conv2D node from Conv2DBackpropInput. + // For that let's first find filter node that is 2nd input (slot 1) + // of BackpropInput. + Node* filter_node = nullptr; + old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, &filter_node); + CHECK_NOTNULL(filter_node); + + // Now check which nodes receive from filter_node. Filter feeds as + // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias. + for (const Edge* e : filter_node->out_edges()) { + if (e->dst()->type_string() == csinfo_.mkl_conv2d && + e->dst_input() == kConv2DFilterInputSlotIdx + /* filter is 2nd input of Conv2D and _MklConv2D. */) { + if (conv2d_node != nullptr) { + VLOG(1) << "MklLayoutRewritePass: unusual case of same filter" + << " feeding multiple Conv2D nodes: " + << filter_node->DebugString(); + // We will not connect filter input of Conv2DBackpropInput + // to be safe here. + do_connect_conv2d_backprop_input_filter = false; + } else { + conv2d_node = e->dst(); + } + } + } + + if (conv2d_node != nullptr) { + do_connect_conv2d_backprop_input_filter = true; + } + } + // Number of input slots to original op // Input slots are represented by .Input() calls in REGISTER_OP. int old_node_input_slots = old_node->op_def().input_arg_size(); @@ -1122,7 +1162,13 @@ int MklLayoutRewritePass::SetUpContiguousInputs( nb->Input(new_node_inputs); nn_slot_idx++; } else { - nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second); + // Special case for connecting filter input of Conv2DBackpropInput + if (do_connect_conv2d_backprop_input_filter && + iidx == kConv2DBackpropInputFilterInputSlotIdx) { + nb->Input(conv2d_node, kConv2DFilterOutputSlotIdx); + } else { + nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second); + } iidx++; nn_slot_idx++; } @@ -1157,9 +1203,17 @@ int MklLayoutRewritePass::SetUpContiguousInputs( } else { Node* mkl_node = nullptr; int mkl_node_output_slot = 0; - GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first, - old_node_inputs[iidx].second, - &mkl_node, &mkl_node_output_slot); + // Special case for connecting filter input of Conv2DBackpropInput + if (do_connect_conv2d_backprop_input_filter && + iidx == kConv2DBackpropInputFilterInputSlotIdx) { + GetNodeProducingMklTensor(g, old_node, conv2d_node, + kConv2DFilterOutputSlotIdx, + &mkl_node, &mkl_node_output_slot); + } else { + GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first, + old_node_inputs[iidx].second, + &mkl_node, &mkl_node_output_slot); + } nb->Input(mkl_node, mkl_node_output_slot); iidx++; nn_slot_idx++; diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 1ab1f1a736..8a2d5e8c05 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -2791,7 +2791,9 @@ REGISTER_OP("_MklConv2D") .Input("mkl_input: uint8") .Input("mkl_filter: uint8") .Output("output: T") + .Output("filter_output: T") .Output("mkl_output: uint8") + .Output("mkl_filter_output: uint8") .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr("use_cudnn_on_gpu: bool = true") @@ -2813,7 +2815,9 @@ REGISTER_OP("_MklConv2DWithBias") .Input("mkl_filter: uint8") .Input("mkl_bias: uint8") .Output("output: T") + .Output("filter_output: T") .Output("mkl_output: uint8") + .Output("mkl_filter_output: uint8") .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr("use_cudnn_on_gpu: bool = true") -- GitLab From 708b14254ad92389f5c778791e593c735cd569b1 Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Thu, 20 Jul 2017 11:24:10 -0700 Subject: [PATCH 238/294] Changed conv fwd to emit filter as extra output, and added some code to replace AddN op by Add in the graph --- tensorflow/core/graph/mkl_layout_pass.cc | 38 ++++++++++++++++++++++++ tensorflow/core/kernels/mkl_conv_ops.cc | 25 ++++++++++++++-- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index ad19cf6faa..2fdc4014c0 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -256,6 +256,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { public: MklLayoutRewritePass() { // NOTE: names are alphabetically sorted. + csinfo_.add = "Add"; + csinfo_.add_n = "AddN"; csinfo_.avg_pool = "AvgPool"; csinfo_.avg_pool_grad = "AvgPoolGrad"; csinfo_.bias_add = "BiasAdd"; @@ -347,6 +349,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.reshape, GetMklOpName(csinfo_.reshape), CopyAttrsReshape, AlwaysRewrite, nullptr}); + // Rewrite AddN if N = 2 into Add. + rinfo_.push_back({csinfo_.add_n, csinfo_.add, + CopyAttrsAddN, BinaryAddRewrite, nullptr}); + // Add info about which ops to add workspace edge to and the slots. wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3}); @@ -429,6 +435,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { /// Structure to store all constant strings /// NOTE: names are alphabetically sorted. typedef struct { + string add; + string add_n; string avg_pool; string avg_pool_grad; string bias_add; @@ -575,6 +583,25 @@ class MklLayoutRewritePass : public GraphOptimizationPass { static bool AlwaysRewrite(const Node* n, const ContextInfo* c = nullptr) { return true; } + + static bool BinaryAddRewrite(const Node* n, const ContextInfo* c = nullptr) { + CHECK_NOTNULL(n); + + int N = 0; + DataType T; + + CHECK_EQ(GetNodeAttr(n->def(), "N", &N).ok(), true); + CHECK_EQ(GetNodeAttr(n->def(), "T", &T).ok(), true); + + // If Add operation supports type T and AddN has only 2 input tensors, + // then we replace AddN by Add. + if (mkl_op_registry::IsMklOp(csinfo_.add, T) && N == 2) { + VLOG(1) << "MklLayoutRewritePass: Replacing AddN with Add"; + return true; + } + return false; + } + // Check if we are performing pooling on depth or batch. If it is, then we // do not rewrite MaxPool node to Mkl version. @@ -907,6 +934,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // We need operator-specific function to copy attributes because the framework // does not provide any generic function for it. // NOTE: names are alphabetically sorted. + static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb); @@ -1433,6 +1461,16 @@ void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded( // Op-specific functions to copy attributes from old node to new node ////////////////////////////////////////////////////////////////////////// +void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node, + NodeBuilder* nb) { + DataType T; + + // Get all attributes from old node. + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); + // Add attributes to new node. + nb->Attr("T", T); +} + void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, NodeBuilder* nb) { DataType T; diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 203e694631..18df17ef2d 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -265,6 +265,27 @@ class MklConv2DOp : public OpKernel { sizeof(T)); AllocateOutputSetMklShape(context, 0, &output, mkl_output_tf_shape, mkl_output_mkl_shape); + + // Filter output to be used in the backprob_input + TensorShape mkl_filter_output_tf_shape; + MklShape mkl_filter_output_mkl_shape; + mkl_filter_output_mkl_shape.SetMklTensor(true); + mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd, dnnResourceFilter); + size_t tf_filter_sizes [4] = {filter.dim_size(3), + filter.dim_size(2), filter.dim_size(1), filter.dim_size(0)}; + size_t tf_filter_strides[4] = {mkl_context.filter_strides[1], mkl_context.filter_strides[0], + mkl_context.filter_strides[2],mkl_context.filter_strides[3]}; + mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), tf_filter_sizes, + tf_filter_strides); + mkl_filter_output_mkl_shape.SetTfDimOrder(mkl_context.filter_dims, data_format_); + mkl_filter_output_tf_shape.AddDim( + dnnLayoutGetMemorySize_F32( + static_cast(mkl_filter_output_mkl_shape.GetMklLayout())) / + sizeof(T)); + AllocateOutputSetMklShape(context, 1, &mkl_context.output_filter, mkl_filter_output_tf_shape, + mkl_filter_output_mkl_shape); + + mkl_context.conv_res[dnnResourceDst] = static_cast(output->flat().data()); @@ -303,6 +324,7 @@ class MklConv2DOp : public OpKernel { dnnPrimitive_t prim_fwd; void* conv_res[dnnResourceNumber]; dnnLayout_t lt_filter, lt_bias, lt_input; + Tensor* output_filter = nullptr; // Create MKL dnnLayout_t objects for tensors coming into the layer void MklCreateInputLayouts(OpKernelContext* context) { @@ -383,8 +405,7 @@ class MklConv2DOp : public OpKernel { CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_filter, lt_filter, mkl_lt_internal_filter), E_SUCCESS); - AllocTmpBuffer(context, mkl_tmp_filter_buf_tensor, - mkl_lt_internal_filter, &mkl_buf_convert_filter); + mkl_buf_convert_filter = const_cast(static_cast(output_filter->flat().data())); CHECK_EQ( dnnConversionExecute_F32(mkl_prim_convert_filter, mkl_buf_filter, mkl_buf_convert_filter), -- GitLab From 6bde6c3584ad85f532d321f184a80e4f0bdae1da Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Mon, 24 Jul 2017 16:47:29 -0700 Subject: [PATCH 239/294] Removed AddN changes --- tensorflow/core/graph/mkl_layout_pass.cc | 35 ------------------------ 1 file changed, 35 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 2fdc4014c0..fbc676edcf 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -257,7 +257,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { MklLayoutRewritePass() { // NOTE: names are alphabetically sorted. csinfo_.add = "Add"; - csinfo_.add_n = "AddN"; csinfo_.avg_pool = "AvgPool"; csinfo_.avg_pool_grad = "AvgPoolGrad"; csinfo_.bias_add = "BiasAdd"; @@ -349,10 +348,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.reshape, GetMklOpName(csinfo_.reshape), CopyAttrsReshape, AlwaysRewrite, nullptr}); - // Rewrite AddN if N = 2 into Add. - rinfo_.push_back({csinfo_.add_n, csinfo_.add, - CopyAttrsAddN, BinaryAddRewrite, nullptr}); - // Add info about which ops to add workspace edge to and the slots. wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3}); @@ -436,7 +431,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { /// NOTE: names are alphabetically sorted. typedef struct { string add; - string add_n; string avg_pool; string avg_pool_grad; string bias_add; @@ -584,24 +578,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return true; } - static bool BinaryAddRewrite(const Node* n, const ContextInfo* c = nullptr) { - CHECK_NOTNULL(n); - - int N = 0; - DataType T; - - CHECK_EQ(GetNodeAttr(n->def(), "N", &N).ok(), true); - CHECK_EQ(GetNodeAttr(n->def(), "T", &T).ok(), true); - - // If Add operation supports type T and AddN has only 2 input tensors, - // then we replace AddN by Add. - if (mkl_op_registry::IsMklOp(csinfo_.add, T) && N == 2) { - VLOG(1) << "MklLayoutRewritePass: Replacing AddN with Add"; - return true; - } - return false; - } - // Check if we are performing pooling on depth or batch. If it is, then we // do not rewrite MaxPool node to Mkl version. @@ -934,7 +910,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // We need operator-specific function to copy attributes because the framework // does not provide any generic function for it. // NOTE: names are alphabetically sorted. - static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb); @@ -1461,16 +1436,6 @@ void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded( // Op-specific functions to copy attributes from old node to new node ////////////////////////////////////////////////////////////////////////// -void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node, - NodeBuilder* nb) { - DataType T; - - // Get all attributes from old node. - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); - // Add attributes to new node. - nb->Attr("T", T); -} - void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, NodeBuilder* nb) { DataType T; -- GitLab From ca0743d4a4d8b24100a6a14a858d4b859c4e35c0 Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Thu, 27 Jul 2017 14:06:03 -0700 Subject: [PATCH 240/294] Fixed a crash in conv-grad-input with NHWC format --- tensorflow/core/kernels/mkl_conv_grad_input_ops.cc | 11 +++++++---- tensorflow/core/kernels/mkl_conv_ops.cc | 10 ++++------ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index ef7338e0e0..da3ac710d4 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -96,13 +96,16 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { OP_REQUIRES(context, mkl_context.filter_shape.GetDimension() == 4, errors::InvalidArgument( "Conv2DCustomBackpropInput: size must be 4-dim")); - - MklSizesToTFSizes(context, data_format, mkl_context.filter_shape, - &filter_shape); + + const int64* filter_sizes = (const int64*) mkl_context.filter_shape.GetSizes(); + const int64 filter_dims = mkl_context.filter_shape.GetDimension(); + + OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(filter_sizes, filter_dims, &filter_shape)); + } else { filter_shape = filter.shape(); } - + // Generate shape for outback prop if input is in MKL format. if (outback_in_mkl_format) { OP_REQUIRES(context, mkl_context.outback_shape.GetDimension() == 4, diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 18df17ef2d..3d20d3a687 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -271,12 +271,10 @@ class MklConv2DOp : public OpKernel { MklShape mkl_filter_output_mkl_shape; mkl_filter_output_mkl_shape.SetMklTensor(true); mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd, dnnResourceFilter); - size_t tf_filter_sizes [4] = {filter.dim_size(3), - filter.dim_size(2), filter.dim_size(1), filter.dim_size(0)}; - size_t tf_filter_strides[4] = {mkl_context.filter_strides[1], mkl_context.filter_strides[0], - mkl_context.filter_strides[2],mkl_context.filter_strides[3]}; - mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), tf_filter_sizes, - tf_filter_strides); + + mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), mkl_context.filter_sizes, + mkl_context.filter_strides); + mkl_filter_output_mkl_shape.SetTfDimOrder(mkl_context.filter_dims, data_format_); mkl_filter_output_tf_shape.AddDim( dnnLayoutGetMemorySize_F32( -- GitLab From d8252035169ca6e0290a43ad6e557e099798aac7 Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Mon, 31 Jul 2017 18:09:56 -0700 Subject: [PATCH 241/294] Fixed the filter sizes order in conv fwd --- tensorflow/core/kernels/mkl_conv_grad_input_ops.cc | 6 +++--- tensorflow/core/kernels/mkl_conv_ops.cc | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index da3ac710d4..1024694be5 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -97,10 +97,10 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { errors::InvalidArgument( "Conv2DCustomBackpropInput: size must be 4-dim")); - const int64* filter_sizes = (const int64*) mkl_context.filter_shape.GetSizes(); - const int64 filter_dims = mkl_context.filter_shape.GetDimension(); + const int64* filter_sizes = (const int64*) mkl_context.filter_shape.GetSizes(); + const int64 filter_dims = mkl_context.filter_shape.GetDimension(); - OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(filter_sizes, filter_dims, &filter_shape)); + OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(filter_sizes, filter_dims, &filter_shape)); } else { filter_shape = filter.shape(); diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 3d20d3a687..ee70544cee 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -272,7 +272,8 @@ class MklConv2DOp : public OpKernel { mkl_filter_output_mkl_shape.SetMklTensor(true); mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd, dnnResourceFilter); - mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), mkl_context.filter_sizes, + size_t filter_sizes [4] = {filter.dim_size(0), filter.dim_size(1), filter.dim_size(2), filter.dim_size(3)}; + mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), filter_sizes, mkl_context.filter_strides); mkl_filter_output_mkl_shape.SetTfDimOrder(mkl_context.filter_dims, data_format_); -- GitLab From 5076e1e312a96711b680696ca66469c8bd49307b Mon Sep 17 00:00:00 2001 From: "niranjan.hasabnis" Date: Tue, 1 Aug 2017 10:51:46 -0700 Subject: [PATCH 242/294] Updating graph unit tests based on filter propagation optimization --- tensorflow/core/graph/mkl_layout_pass_test.cc | 12 ++++++------ tensorflow/core/graph/mkl_tfconversion_pass_test.cc | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 482e339802..bd1d74368e 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -788,7 +788,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) { "DMT/_1(Const);DMT/_2(Const);E(Mul)|A->C;A->D;" "A:control->DMT/_0:control;A:control->DMT/_1:control;" "A:control->DMT/_2:control;B->C:1;C->D:1;C->E;" - "C:1->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2"); + "C:2->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2"); } // Conv2D with INT32 which is not supported by Mkl @@ -917,7 +917,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) { "A:control->DMT/_2:control;A:control->DMT/_3:control;" "B->E:1;C->F;C:control->DMT/_0:control;C:control->DMT/_1:control;" "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;" - "DMT/_4->H:3;E->H:1;E:1->H:4;F->H:2;F:1->H:5;G->H;" + "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;" "G:control->DMT/_4:control;H->I:1"); } @@ -953,7 +953,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) { "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Mul);G(Const);" "H(_MklConcat);I(Mul)|A->E;A->I;A:control->DMT/_0:control;" "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;" - "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:1->H:4;F->H:2;" + "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;" "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1"); } @@ -1023,8 +1023,8 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) { "A:control->DMT/_2:control;A:control->DMT/_3:control;B->E:1;C->F;" "C:control->DMT/_0:control;C:control->DMT/_1:control;" "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;" - "DMT/_4->H:5;E->H;E:1->H:3;E:control->DMT/_4:control;F->H:1;" - "F:1->H:4;G->H:2;H->I:1"); + "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;" + "F:2->H:4;G->H:2;H->I:1"); } // ConcatV2 with 1 Mkl and 1 non-Mkl layer feeding it @@ -1060,7 +1060,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) { "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Mul);G(Const);" "H(_MklConcatV2);I(Mul)|A->E;A->I;A:control->DMT/_0:control;" "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;" - "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:1->H:3;" + "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;" "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;" "G->H:2;H->I:1"); } diff --git a/tensorflow/core/graph/mkl_tfconversion_pass_test.cc b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc index 90bef11164..b01818f746 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass_test.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc @@ -173,13 +173,13 @@ TEST_F(MklToTfConversionPass, Positive) { EXPECT_EQ(DoRunMklToTfConversionPass(), "A(Input);B(Input);C(_MklConv2D);D(Input);E(Sub);M(_MklInput);" "Mkl2Tf/_0(_MklToTf);N(_MklInput)|A->C;B->C:1;C->Mkl2Tf/_0;" - "C:1->Mkl2Tf/_0:1;D->E:1;M->C:2;Mkl2Tf/_0->E;N->C:3"); + "C:2->Mkl2Tf/_0:1;D->E:1;M->C:2;Mkl2Tf/_0->E;N->C:3"); } } // MklConv2D followed by MklToTf op followed by Non-Mkl layer. // C=MklConv2D(A,M,B,N); D=MklToTf(C:0, C:1) F=Sub(D,E) (for interleaved) -// C=MklConv2D(A,B,M,N); D=MklToTf(C:0, C:1) F=Sub(D,E) (for contiguous) +// C=MklConv2D(A,B,M,N); D=MklToTf(C:0, C:2) F=Sub(D,E) (for contiguous) // MklToTf node should not be inserted again. TEST_F(MklToTfConversionPass, Negative_DoubleInsert) { if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { @@ -226,7 +226,7 @@ TEST_F(MklToTfConversionPass, Negative_DoubleInsert) { "node { name: 'D' op: '_MklToTf'" " attr { key: 'T' value { type: DT_FLOAT } }" " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['C:0', 'C:1']}" + " input: ['C:0', 'C:2']}" "node { name: 'E' op: 'Input'}" "node { name: 'F' op: 'Sub'" " attr {key: 'T' value { type: DT_FLOAT } }" @@ -234,7 +234,7 @@ TEST_F(MklToTfConversionPass, Negative_DoubleInsert) { EXPECT_EQ(DoRunMklToTfConversionPass(), "A(Input);B(Input);C(_MklConv2D);D(_MklToTf);E(Input);" "F(Sub);M(_MklInput);N(_MklInput)|" - "A->C;B->C:1;C->D;C:1->D:1;D->F;E->F:1;M->C:2;N->C:3"); + "A->C;B->C:1;C->D;C:2->D:1;D->F;E->F:1;M->C:2;N->C:3"); } } -- GitLab From fafb92fb954e6e516872985f58d9219941314fbd Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Thu, 10 Aug 2017 17:31:26 -0700 Subject: [PATCH 243/294] Fixed style errors manually rather using clang tool --- tensorflow/core/graph/mkl_layout_pass.cc | 3 --- .../core/kernels/mkl_conv_grad_input_ops.cc | 13 +++++----- tensorflow/core/kernels/mkl_conv_ops.cc | 25 +++++++++++-------- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index fbc676edcf..ad19cf6faa 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -256,7 +256,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { public: MklLayoutRewritePass() { // NOTE: names are alphabetically sorted. - csinfo_.add = "Add"; csinfo_.avg_pool = "AvgPool"; csinfo_.avg_pool_grad = "AvgPoolGrad"; csinfo_.bias_add = "BiasAdd"; @@ -430,7 +429,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { /// Structure to store all constant strings /// NOTE: names are alphabetically sorted. typedef struct { - string add; string avg_pool; string avg_pool_grad; string bias_add; @@ -577,7 +575,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { static bool AlwaysRewrite(const Node* n, const ContextInfo* c = nullptr) { return true; } - // Check if we are performing pooling on depth or batch. If it is, then we // do not rewrite MaxPool node to Mkl version. diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index 1024694be5..50700c8bc8 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -96,16 +96,17 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { OP_REQUIRES(context, mkl_context.filter_shape.GetDimension() == 4, errors::InvalidArgument( "Conv2DCustomBackpropInput: size must be 4-dim")); - - const int64* filter_sizes = (const int64*) mkl_context.filter_shape.GetSizes(); + + const int64* filter_sizes = + (const int64*) mkl_context.filter_shape.GetSizes(); const int64 filter_dims = mkl_context.filter_shape.GetDimension(); - - OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(filter_sizes, filter_dims, &filter_shape)); - + + OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(filter_sizes, + filter_dims, &filter_shape)); } else { filter_shape = filter.shape(); } - + // Generate shape for outback prop if input is in MKL format. if (outback_in_mkl_format) { OP_REQUIRES(context, mkl_context.outback_shape.GetDimension() == 4, diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index ee70544cee..0bff2ff238 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -265,26 +265,28 @@ class MklConv2DOp : public OpKernel { sizeof(T)); AllocateOutputSetMklShape(context, 0, &output, mkl_output_tf_shape, mkl_output_mkl_shape); - // Filter output to be used in the backprob_input TensorShape mkl_filter_output_tf_shape; MklShape mkl_filter_output_mkl_shape; mkl_filter_output_mkl_shape.SetMklTensor(true); - mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd, dnnResourceFilter); + mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd, + dnnResourceFilter); - size_t filter_sizes [4] = {filter.dim_size(0), filter.dim_size(1), filter.dim_size(2), filter.dim_size(3)}; + size_t filter_sizes[4] = {filter.dim_size(0), filter.dim_size(1), + filter.dim_size(2), filter.dim_size(3)}; mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), filter_sizes, mkl_context.filter_strides); - mkl_filter_output_mkl_shape.SetTfDimOrder(mkl_context.filter_dims, data_format_); + mkl_filter_output_mkl_shape.SetTfDimOrder(mkl_context.filter_dims, + data_format_); mkl_filter_output_tf_shape.AddDim( dnnLayoutGetMemorySize_F32( - static_cast(mkl_filter_output_mkl_shape.GetMklLayout())) / - sizeof(T)); - AllocateOutputSetMklShape(context, 1, &mkl_context.output_filter, mkl_filter_output_tf_shape, - mkl_filter_output_mkl_shape); - - + static_cast( + mkl_filter_output_mkl_shape.GetMklLayout())) / + sizeof(T)); + AllocateOutputSetMklShape(context, 1, &mkl_context.output_filter, + mkl_filter_output_tf_shape, mkl_filter_output_mkl_shape); + mkl_context.conv_res[dnnResourceDst] = static_cast(output->flat().data()); @@ -404,7 +406,8 @@ class MklConv2DOp : public OpKernel { CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_filter, lt_filter, mkl_lt_internal_filter), E_SUCCESS); - mkl_buf_convert_filter = const_cast(static_cast(output_filter->flat().data())); + mkl_buf_convert_filter = const_cast(static_cast( + output_filter->flat().data())); CHECK_EQ( dnnConversionExecute_F32(mkl_prim_convert_filter, mkl_buf_filter, mkl_buf_convert_filter), -- GitLab From 7bffb80bf3c6100a50ce497b582492888dbc110f Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Thu, 10 Aug 2017 17:44:02 -0700 Subject: [PATCH 244/294] Fixed typo --- tensorflow/core/kernels/mkl_conv_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 0bff2ff238..b50a6343ba 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -265,7 +265,7 @@ class MklConv2DOp : public OpKernel { sizeof(T)); AllocateOutputSetMklShape(context, 0, &output, mkl_output_tf_shape, mkl_output_mkl_shape); - // Filter output to be used in the backprob_input + // Filter output to be used in the backprop_input TensorShape mkl_filter_output_tf_shape; MklShape mkl_filter_output_mkl_shape; mkl_filter_output_mkl_shape.SetMklTensor(true); -- GitLab From a8962b417703503a3873eb35d763f6ae225aabd4 Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Fri, 25 Aug 2017 16:44:03 -0700 Subject: [PATCH 245/294] Addressing review comments for layout pass --- tensorflow/core/graph/mkl_layout_pass.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index ad19cf6faa..4c79323197 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -1099,8 +1099,8 @@ int MklLayoutRewritePass::SetUpContiguousInputs( CHECK_NOTNULL(workspace_tensors); CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - // Temporary solution to connect filter input of BackpropInput with the - // converted filter from Conv2D. + // TODO(nhasabni): Temporary solution to connect filter input of + // BackpropInput with the converted filter from Conv2D. bool do_connect_conv2d_backprop_input_filter = false; Node* conv2d_node = nullptr; // Filter node is 2nd input (slot index 1) of Conv2D. @@ -1128,15 +1128,13 @@ int MklLayoutRewritePass::SetUpContiguousInputs( // We will not connect filter input of Conv2DBackpropInput // to be safe here. do_connect_conv2d_backprop_input_filter = false; + break; } else { conv2d_node = e->dst(); + do_connect_conv2d_backprop_input_filter = true; } } } - - if (conv2d_node != nullptr) { - do_connect_conv2d_backprop_input_filter = true; - } } // Number of input slots to original op -- GitLab From a1f2caa624548c7cb16c21ec62d7c7dabf0bc2c8 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 28 Aug 2017 07:23:46 -0700 Subject: [PATCH 246/294] delete get_started/monitors.md --- tensorflow/docs_src/get_started/leftnav_files | 1 - tensorflow/docs_src/get_started/monitors.md | 406 ------------------ 2 files changed, 407 deletions(-) delete mode 100644 tensorflow/docs_src/get_started/monitors.md diff --git a/tensorflow/docs_src/get_started/leftnav_files b/tensorflow/docs_src/get_started/leftnav_files index b656033f7e..bb67eaddda 100644 --- a/tensorflow/docs_src/get_started/leftnav_files +++ b/tensorflow/docs_src/get_started/leftnav_files @@ -5,7 +5,6 @@ mnist/pros.md mnist/mechanics.md estimator.md input_fn.md -monitors.md summaries_and_tensorboard.md graph_viz.md tensorboard_histograms.md diff --git a/tensorflow/docs_src/get_started/monitors.md b/tensorflow/docs_src/get_started/monitors.md deleted file mode 100644 index 5606e95365..0000000000 --- a/tensorflow/docs_src/get_started/monitors.md +++ /dev/null @@ -1,406 +0,0 @@ -# Logging and Monitoring Basics with tf.contrib.learn - -When training a model, it’s often valuable to track and evaluate progress in -real time. In this tutorial, you’ll learn how to use TensorFlow’s logging -capabilities and the `Monitor` API to audit the in-progress training of a neural -network classifier for categorizing irises. This tutorial builds on the code -developed in @{$estimator$tf.estimator Quickstart} so if you -haven't yet completed that tutorial, you may want to explore it first, -especially if you're looking for an intro/refresher on tf.contrib.learn basics. - -## Setup {#setup} - -For this tutorial, you'll be building upon the following code from -@{$estimator$tf.estimator Quickstart}: - -```python -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import numpy as np -import tensorflow as tf - -# Data sets -IRIS_TRAINING = os.path.join(os.path.dirname(__file__), "iris_training.csv") -IRIS_TEST = os.path.join(os.path.dirname(__file__), "iris_test.csv") - -def main(unused_argv): - # Load datasets. - training_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32) - test_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32) - - # Specify that all features have real-value data - feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)] - - # Build 3 layer DNN with 10, 20, 10 units respectively. - classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns, - hidden_units=[10, 20, 10], - n_classes=3, - model_dir="/tmp/iris_model") - - # Fit model. - classifier.fit(x=training_set.data, - y=training_set.target, - steps=2000) - - # Evaluate accuracy. - accuracy_score = classifier.evaluate(x=test_set.data, - y=test_set.target)["accuracy"] - print('Accuracy: {0:f}'.format(accuracy_score)) - - # Classify two new flower samples. - new_samples = np.array( - [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float) - y = list(classifier.predict(new_samples, as_iterable=True)) - print('Predictions: {}'.format(str(y))) - -if __name__ == "__main__": - tf.app.run() -``` - -Copy the above code into a file, and download the corresponding -[training](http://download.tensorflow.org/data/iris_training.csv) and -[test](http://download.tensorflow.org/data/iris_test.csv) data sets to the same -directory. - -In the following sections, you'll progressively make updates to the above code -to add logging and monitoring capabilities. Final code incorporating all updates -is [available for download -here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/monitors/iris_monitors.py). - -## Overview - -The @{$estimator$tf.estimator Quickstart tutorial} walked through -how to implement a neural net classifier to categorize iris examples into one of -three species. - -But when [the code](#setup) from this tutorial is run, the output contains no -logging tracking how model training is progressing—only the results of the -`print` statements that were included: - -```none -Accuracy: 0.933333 -Predictions: [1 2] -``` - -Without any logging, model training feels like a bit of a black box; you can't -see what's happening as TensorFlow steps through gradient descent, get a sense -of whether the model is converging appropriately, or audit to determine whether -[early stopping](https://en.wikipedia.org/wiki/Early_stopping) might be -appropriate. - -One way to address this problem would be to split model training into multiple -`fit` calls with smaller numbers of steps in order to evaluate accuracy more -progressively. However, this is not recommended practice, as it greatly slows -down model training. Fortunately, tf.contrib.learn offers another solution: a -@{tf.contrib.learn.monitors$Monitor API} designed to help -you log metrics and evaluate your model while training is in progress. In the -following sections, you'll learn how to enable logging in TensorFlow, set up a -ValidationMonitor to do streaming evaluations, and visualize your metrics using -TensorBoard. - -## Enabling Logging with TensorFlow - -TensorFlow uses five different levels for log messages. In order of ascending -severity, they are `DEBUG`, `INFO`, `WARN`, `ERROR`, and `FATAL`. When you -configure logging at any of these levels, TensorFlow will output all log -messages corresponding to that level and all levels of higher severity. For -example, if you set a logging level of `ERROR`, you'll get log output containing -`ERROR` and `FATAL` messages, and if you set a level of `DEBUG`, you'll get log -messages from all five levels. - -By default, TensorFlow is configured at a logging level of `WARN`, but when -tracking model training, you'll want to adjust the level to `INFO`, which will -provide additional feedback as `fit` operations are in progress. - -Add the following line to the beginning of your code (right after your -`import`s): - -```python -tf.logging.set_verbosity(tf.logging.INFO) -``` - -Now when you run the code, you'll see additional log output like the following: - -```none -INFO:tensorflow:loss = 1.18812, step = 1 -INFO:tensorflow:loss = 0.210323, step = 101 -INFO:tensorflow:loss = 0.109025, step = 201 -``` - -With `INFO`-level logging, tf.contrib.learn automatically outputs [training-loss -metrics](https://en.wikipedia.org/wiki/Loss_function) to stderr after every 100 -steps. - -## Configuring a ValidationMonitor for Streaming Evaluation - -Logging training loss is helpful to get a sense whether your model is -converging, but what if you want further insight into what's happening during -training? tf.contrib.learn provides several high-level `Monitor`s you can attach -to your `fit` operations to further track metrics and/or debug lower-level -TensorFlow operations during model training, including: - -Monitor | Description -------------------- | ----------- -`CaptureVariable` | Saves a specified variable's values into a collection at every _n_ steps of training -`PrintTensor` | Logs a specified tensor's values at every _n_ steps of training -`SummarySaver` | Saves @{tf.Summary} [protocol buffers](https://developers.google.com/protocol-buffers/) for a given tensor using a @{tf.summary.FileWriter} at every _n_ steps of training -`ValidationMonitor` | Logs a specified set of evaluation metrics at every _n_ steps of training, and, if desired, implements early stopping under certain conditions - -### Evaluating Every *N* Steps - -For the iris neural network classifier, while logging training loss, you might -also want to simultaneously evaluate against test data to see how well the model -is generalizing. You can accomplish this by configuring a `ValidationMonitor` -with the test data (`test_set.data` and `test_set.target`), and setting how -often to evaluate with `every_n_steps`. The default value of `every_n_steps` is -`100`; here, set `every_n_steps` to `50` to evaluate after every 50 steps of -model training: - -```python -validation_monitor = tf.contrib.learn.monitors.ValidationMonitor( - test_set.data, - test_set.target, - every_n_steps=50) -``` - -Place this code right before the line instantiating the `classifier`. - -`ValidationMonitor`s rely on saved checkpoints to perform evaluation operations, -so you'll want to modify instantiation of the `classifier` to add a -@{tf.contrib.learn.RunConfig} that includes -`save_checkpoints_secs`, which specifies how many seconds should elapse between -checkpoint saves during training. Because the iris data set is quite small, and -thus trains quickly, it makes sense to set `save_checkpoints_secs` to 1 (saving -a checkpoint every second) to ensure a sufficient number of checkpoints: - -```python -classifier = tf.contrib.learn.DNNClassifier( - feature_columns=feature_columns, - hidden_units=[10, 20, 10], - n_classes=3, - model_dir="/tmp/iris_model", - config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1)) -``` - -NOTE: The `model_dir` parameter specifies an explicit directory -(`/tmp/iris_model`) for model data to be stored; this directory path will be -easier to reference later on than an autogenerated one. Each time you run the -code, any existing data in `/tmp/iris_model` will be loaded, and model training -will continue where it left off in the last run (e.g., running the script twice -in succession will execute 4000 steps during training—2000 during each -`fit` operation). To start over model training from scratch, delete -`/tmp/iris_model` before running the code. - -Finally, to attach your `validation_monitor`, update the `fit` call to include a -`monitors` param, which takes a list of all monitors to run during model -training: - -```python -classifier.fit(x=training_set.data, - y=training_set.target, - steps=2000, - monitors=[validation_monitor]) -``` - -Now, when you rerun the code, you should see validation metrics in your log -output, e.g.: - -```none -INFO:tensorflow:Validation (step 50): loss = 1.71139, global_step = 0, accuracy = 0.266667 -... -INFO:tensorflow:Validation (step 300): loss = 0.0714158, global_step = 268, accuracy = 0.966667 -... -INFO:tensorflow:Validation (step 1750): loss = 0.0574449, global_step = 1729, accuracy = 0.966667 -``` - -### Customizing the Evaluation Metrics with MetricSpec - -By default, if no evaluation metrics are specified, `ValidationMonitor` will log -both [loss](https://en.wikipedia.org/wiki/Loss_function) and accuracy, but you -can customize the list of metrics that will be run every 50 steps. To specify -the exact metrics you'd like to run in each evaluation pass, you can add a -`metrics` param to the `ValidationMonitor` constructor. `metrics` takes a dict -of key/value pairs, where each key is the name you'd like logged for the metric, -and the corresponding value is a -[`MetricSpec`](https://www.tensorflow.org/code/tensorflow/contrib/learn/python/learn/metric_spec.py) -object. - -The `MetricSpec` constructor accepts four parameters: - -* `metric_fn`. The function that calculates and returns the value of a metric. - This can be a predefined function available in the - @{tf.contrib.metrics} module, such as - @{tf.contrib.metrics.streaming_precision} or - @{tf.contrib.metrics.streaming_recall}. - - Alternatively, you can define your own custom metric function, which must - take `predictions` and `labels` tensors as arguments (a `weights` argument - can also optionally be supplied). The function must return the value of the - metric in one of two formats: - - * A single tensor - * A pair of ops `(value_op, update_op)`, where `value_op` returns the - metric value and `update_op` performs a corresponding operation to - update internal model state. - -* `prediction_key`. The key of the tensor containing the predictions returned - by the model. This argument may be omitted if the model returns either a - single tensor or a dict with a single entry. For a `DNNClassifier` model, - class predictions will be returned in a tensor with the key - @{tf.contrib.learn.PredictionKey.CLASSES}. - -* `label_key`. The key of the tensor containing the labels returned by the - model, as specified by the model's @{$input_fn$`input_fn`}. As - with `prediction_key`, this argument may be omitted if the `input_fn` - returns either a single tensor or a dict with a single entry. In the iris - example in this tutorial, the `DNNClassifier` does not have an `input_fn` - (`x`,`y` data is passed directly to `fit`), so it's not necessary to provide - a `label_key`. - -* `weights_key`. *Optional*. The key of the tensor (returned by the - @{$input_fn$`input_fn`}) containing weights inputs for the - `metric_fn`. - -The following code creates a `validation_metrics` dict that defines three -metrics to log during model evaluation: - -* `"accuracy"`, using @{tf.contrib.metrics.streaming_accuracy} - as the `metric_fn` -* `"precision"`, using @{tf.contrib.metrics.streaming_precision} - as the `metric_fn` -* `"recall"`, using @{tf.contrib.metrics.streaming_recall} - as the `metric_fn` - -```python -validation_metrics = { - "accuracy": - tf.contrib.learn.MetricSpec( - metric_fn=tf.contrib.metrics.streaming_accuracy, - prediction_key=tf.contrib.learn.PredictionKey.CLASSES), - "precision": - tf.contrib.learn.MetricSpec( - metric_fn=tf.contrib.metrics.streaming_precision, - prediction_key=tf.contrib.learn.PredictionKey.CLASSES), - "recall": - tf.contrib.learn.MetricSpec( - metric_fn=tf.contrib.metrics.streaming_recall, - prediction_key=tf.contrib.learn.PredictionKey.CLASSES) -} -``` - -Add the above code before the `ValidationMonitor` constructor. Then revise the -`ValidationMonitor` constructor as follows to add a `metrics` parameter to log -the accuracy, precision, and recall metrics specified in `validation_metrics` -(loss is always logged, and doesn't need to be explicitly specified): - -```python -validation_monitor = tf.contrib.learn.monitors.ValidationMonitor( - test_set.data, - test_set.target, - every_n_steps=50, - metrics=validation_metrics) -``` - -Rerun the code, and you should see precision and recall included in your log -output, e.g.: - -```none -INFO:tensorflow:Validation (step 50): recall = 0.0, loss = 1.20626, global_step = 1, precision = 0.0, accuracy = 0.266667 -... -INFO:tensorflow:Validation (step 600): recall = 1.0, loss = 0.0530696, global_step = 571, precision = 1.0, accuracy = 0.966667 -... -INFO:tensorflow:Validation (step 1500): recall = 1.0, loss = 0.0617403, global_step = 1452, precision = 1.0, accuracy = 0.966667 -``` - -### Early Stopping with ValidationMonitor - -Note that in the above log output, by step 600, the model has already achieved -precision and recall rates of 1.0. This raises the question as to whether model -training could benefit from -[early stopping](https://en.wikipedia.org/wiki/Early_stopping). - -In addition to logging eval metrics, `ValidationMonitor`s make it easy to -implement early stopping when specified conditions are met, via three params: - -| Param | Description | -| -------------------------------- | ----------------------------------------- | -| `early_stopping_metric` | Metric that triggers early stopping | -: : (e.g., loss or accuracy) under conditions : -: : specified in `early_stopping_rounds` and : -: : `early_stopping_metric_minimize`. Default : -: : is `"loss"`. : -| `early_stopping_metric_minimize` | `True` if desired model behavior is to | -: : minimize the value of : -: : `early_stopping_metric`; `False` if : -: : desired model behavior is to maximize the : -: : value of `early_stopping_metric`. Default : -: : is `True`. : -| `early_stopping_rounds` | Sets a number of steps during which if | -: : the `early_stopping_metric` does not : -: : decrease (if : -: : `early_stopping_metric_minimize` is : -: : `True`) or increase (if : -: : `early_stopping_metric_minimize` is : -: : `False`), training will be stopped. : -: : Default is `None`, which means early : -: : stopping will never occur. : - -Make the following revision to the `ValidationMonitor` constructor, which -specifies that if loss (`early_stopping_metric="loss"`) does not decrease -(`early_stopping_metric_minimize=True`) over a period of 200 steps -(`early_stopping_rounds=200`), model training will stop immediately at that -point, and not complete the full 2000 steps specified in `fit`: - -```python -validation_monitor = tf.contrib.learn.monitors.ValidationMonitor( - test_set.data, - test_set.target, - every_n_steps=50, - metrics=validation_metrics, - early_stopping_metric="loss", - early_stopping_metric_minimize=True, - early_stopping_rounds=200) -``` - -Rerun the code to see if model training stops early: - -```none -... -INFO:tensorflow:Validation (step 1150): recall = 1.0, loss = 0.056436, global_step = 1119, precision = 1.0, accuracy = 0.966667 -INFO:tensorflow:Stopping. Best step: 800 with loss = 0.048313818872. -``` - -Indeed, here training stops at step 1150, indicating that for the past 200 -steps, loss did not decrease, and that overall, step 800 produced the smallest -loss value against the test data set. This suggests that additional calibration -of hyperparameters by decreasing the step count might further improve the model. - -## Visualizing Log Data with TensorBoard - -Reading through the log produced by `ValidationMonitor` provides plenty of raw -data on model performance during training, but it may also be helpful to see -visualizations of this data to get further insight into trends—for -example, how accuracy is changing over step count. You can use TensorBoard (a -separate program packaged with TensorFlow) to plot graphs like this by setting -the `logdir` command-line argument to the directory where you saved your model -training data (here, `/tmp/iris_model`). Run the following on your command line: - -
$ tensorboard --logdir=/tmp/iris_model/
-Starting TensorBoard 39 on port 6006
- -Then navigate to `http://0.0.0.0:`*``* in your browser, where -*``* is the port specified in the command-line output (here, -`6006`). - -If you click on the accuracy field, you'll see an image like the following, -which shows accuracy plotted against step count: - -![Accuracy over step count in TensorBoard](https://www.tensorflow.org/images/validation_monitor_tensorboard_accuracy.png "Accuracy over step count in TensorBoard") - -For more on using TensorBoard, see @{$summaries_and_tensorboard$TensorBoard: Visualizing Learning} and @{$graph_viz$TensorBoard: Graph Visualization}. -- GitLab From d520e4f4f617b2cbd5ebe1bc7f97d4a442ab5d30 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 28 Aug 2017 07:54:25 -0700 Subject: [PATCH 247/294] clear references to deleted doc --- tensorflow/docs_src/get_started/estimator.md | 8 ++------ tensorflow/docs_src/get_started/index.md | 2 -- tensorflow/docs_src/get_started/input_fn.md | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/tensorflow/docs_src/get_started/estimator.md b/tensorflow/docs_src/get_started/estimator.md index a55454f8af..4f3a438d17 100644 --- a/tensorflow/docs_src/get_started/estimator.md +++ b/tensorflow/docs_src/get_started/estimator.md @@ -273,9 +273,7 @@ Then, the code creates a `DNNClassifier` model using the following arguments: containing 10, 20, and 10 neurons, respectively. * `n_classes=3`. Three target classes, representing the three Iris species. * `model_dir=/tmp/iris_model`. The directory in which TensorFlow will save - checkpoint data during model training. For more on logging and monitoring - with TensorFlow, see - @{$monitors$Logging and Monitoring Basics with tf.estimator}. + checkpoint data and TensorBoard summaries during model training. ## Describe the training input pipeline {#train-input} @@ -315,9 +313,7 @@ classifier.train(input_fn=train_input_fn, steps=1000) However, if you're looking to track the model while it trains, you'll likely want to instead use a TensorFlow @{tf.train.SessionRunHook$`SessionRunHook`} -to perform logging operations. See the tutorial -@{$monitors$Logging and Monitoring Basics with tf.estimator} -for more on this topic. +to perform logging operations. ## Evaluate Model Accuracy {#evaluate-accuracy} diff --git a/tensorflow/docs_src/get_started/index.md b/tensorflow/docs_src/get_started/index.md index 3e700daa30..003fac1a28 100644 --- a/tensorflow/docs_src/get_started/index.md +++ b/tensorflow/docs_src/get_started/index.md @@ -24,8 +24,6 @@ To learn about the high-level API, read the following guides: API. * @{$get_started/input_fn$Building Input Functions}, which takes you into a somewhat more sophisticated use of this API. - * @{$get_started/monitors$Logging and Monitoring Basics with tf.contrib.learn}, - which explains how to audit the progress of model training. TensorBoard is a utility to visualize different aspects of machine learning. The following guides explain how to use TensorBoard: diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md index 422f45c586..7706c07b1d 100644 --- a/tensorflow/docs_src/get_started/input_fn.md +++ b/tensorflow/docs_src/get_started/input_fn.md @@ -249,7 +249,7 @@ here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/input_fn/bos ### Importing the Housing Data -To start, set up your imports (including `pandas` and `tensorflow`) and @{$monitors#enabling-logging-with-tensorflow$set logging verbosity} to +To start, set up your imports (including `pandas` and `tensorflow`) and set logging verbosity to `INFO` for more detailed log output: ```python -- GitLab From b808f1274aab4560ae84a1363acca44fd6e86b54 Mon Sep 17 00:00:00 2001 From: Dan Jarvis Date: Sun, 27 Aug 2017 17:49:01 -0400 Subject: [PATCH 248/294] Removing slightly confusing ">" from output --- tensorflow/python/tools/import_pb_to_tensorboard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py index a8712fc37e..00de044505 100644 --- a/tensorflow/python/tools/import_pb_to_tensorboard.py +++ b/tensorflow/python/tools/import_pb_to_tensorboard.py @@ -51,7 +51,7 @@ def import_to_tensorboard(model_dir, log_dir): pb_visual_writer = summary.FileWriter(log_dir) pb_visual_writer.add_graph(sess.graph) print("Model Imported. Visualize by running: " - "> tensorboard --logdir={}".format(log_dir)) + "tensorboard --logdir={}".format(log_dir)) def main(unused_args): -- GitLab From 0cfd119fad0534588ab45386b45d8514bdd3cc00 Mon Sep 17 00:00:00 2001 From: Qingqing Cao Date: Fri, 25 Aug 2017 12:23:55 +0100 Subject: [PATCH 249/294] fix: profiler bazel build usage --- tensorflow/core/profiler/g3doc/command_line.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/profiler/g3doc/command_line.md b/tensorflow/core/profiler/g3doc/command_line.md index e2839a682f..fb4207c784 100644 --- a/tensorflow/core/profiler/g3doc/command_line.md +++ b/tensorflow/core/profiler/g3doc/command_line.md @@ -57,7 +57,7 @@ Note: this feature is not well maintained now. ```shell # Build the tool. -bazel build --config opt third_party/tensorflow/core/profiler/... +bazel build --config opt tensorflow/core/profiler:profiler # Help information, including detail 'option' instructions. bazel-bin/tensorflow/core/profiler/profiler help -- GitLab From 01d5dff324744a3089b5096e9dd9f8fdc3bede6e Mon Sep 17 00:00:00 2001 From: Penghao Cen Date: Fri, 25 Aug 2017 12:50:48 +0800 Subject: [PATCH 250/294] Double check after regex search for cudnn_path --- configure.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/configure.py b/configure.py index 186fdc9ddc..b49cdf2383 100644 --- a/configure.py +++ b/configure.py @@ -685,7 +685,9 @@ def set_tf_cunn_version(environ_cp): ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' cudnn_path_from_ldconfig = run_shell([ldconfig_bin, '-p']) cudnn_path_from_ldconfig = re.search('.*libcudnn.so .* => (.*)', - cudnn_path_from_ldconfig).group(1) + cudnn_path_from_ldconfig) + if cudnn_path_from_ldconfig != None: + cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1) if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)): cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig) break -- GitLab From cf81d8e60761fa4c0e406f71daaa4040eae4b8ee Mon Sep 17 00:00:00 2001 From: Penghao Cen Date: Tue, 29 Aug 2017 09:50:29 +0800 Subject: [PATCH 251/294] Move path.exists if block inside None check --- configure.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configure.py b/configure.py index b49cdf2383..fcf359d061 100644 --- a/configure.py +++ b/configure.py @@ -686,11 +686,11 @@ def set_tf_cunn_version(environ_cp): cudnn_path_from_ldconfig = run_shell([ldconfig_bin, '-p']) cudnn_path_from_ldconfig = re.search('.*libcudnn.so .* => (.*)', cudnn_path_from_ldconfig) - if cudnn_path_from_ldconfig != None: + if cudnn_path_from_ldconfig: cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1) - if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)): - cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig) - break + if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)): + cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig) + break # Reset and Retry print( -- GitLab From 97ee15e8b61c029782f2e680223411c2bf1efaad Mon Sep 17 00:00:00 2001 From: Eric Liu Date: Tue, 5 Sep 2017 09:08:45 -0700 Subject: [PATCH 252/294] [tpu:profiler] Refactor profile dumping logic into a library. PiperOrigin-RevId: 167586829 --- tensorflow/contrib/tpu/profiler/BUILD | 21 ++- .../tpu/profiler/capture_tpu_profile.cc | 133 +------------- .../contrib/tpu/profiler/dump_tpu_profile.cc | 164 ++++++++++++++++++ .../contrib/tpu/profiler/dump_tpu_profile.h | 38 ++++ 4 files changed, 223 insertions(+), 133 deletions(-) create mode 100644 tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc create mode 100644 tensorflow/contrib/tpu/profiler/dump_tpu_profile.h diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD index 9157c79a90..a567d1bbb0 100644 --- a/tensorflow/contrib/tpu/profiler/BUILD +++ b/tensorflow/contrib/tpu/profiler/BUILD @@ -14,19 +14,30 @@ tf_proto_library_cc( visibility = ["//visibility:public"], ) -cc_binary( - name = "capture_tpu_profile", - srcs = ["capture_tpu_profile.cc"], - visibility = ["//tensorflow/contrib/tpu/profiler:__subpackages__"], +cc_library( + name = "dump_tpu_profile", + srcs = ["dump_tpu_profile.cc"], + hdrs = ["dump_tpu_profile.h"], deps = [ ":op_profile_proto_cc", ":tpu_profiler_proto_cc", ":trace_events_proto_cc", ":trace_events_to_json", "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + ], +) + +cc_binary( + name = "capture_tpu_profile", + srcs = ["capture_tpu_profile.cc"], + visibility = ["//tensorflow/contrib/tpu/profiler:__subpackages__"], + deps = [ + ":dump_tpu_profile", + ":tpu_profiler_proto_cc", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", "//tensorflow/core/distributed_runtime/rpc:grpc_util", "//tensorflow/core/platform/cloud:gcs_file_system", "@grpc//:grpc++_unsecure", diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc index db77b3fa90..5b51a72ece 100644 --- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc @@ -24,24 +24,12 @@ limitations under the License. #include #include -#include "tensorflow/contrib/tpu/profiler/op_profile.pb.h" +#include "tensorflow/contrib/tpu/profiler/dump_tpu_profile.h" #include "tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h" -#include "tensorflow/contrib/tpu/profiler/trace_events.pb.h" -#include "tensorflow/contrib/tpu/profiler/trace_events_to_json.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" -#include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/io/compression.h" -#include "tensorflow/core/lib/io/path.h" -#include "tensorflow/core/lib/strings/str_util.h" -#include "tensorflow/core/lib/strings/strcat.h" -#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" -#include "tensorflow/core/platform/protobuf.h" -#include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/util/command_line_flags.h" -#include "tensorflow/core/util/event.pb.h" -#include "tensorflow/core/util/events_writer.h" namespace tensorflow { namespace tpu { @@ -49,16 +37,6 @@ namespace { using ::tensorflow::TPUProfiler; -using ::grpc::ClientContext; -using ::tensorflow::io::JoinPath; -using ::tensorflow::protobuf::util::JsonOptions; -using ::tensorflow::protobuf::util::MessageToJsonString; - -constexpr char kProfilePluginDirectory[] = "plugins/profile/"; -constexpr char kJsonOpProfileFileName[] = "op_profile.json"; -constexpr char kProtoTraceFileName[] = "trace"; -constexpr char kJsonTraceFileName[] = "trace.json.gz"; -constexpr char kGraphRunPrefix[] = "tpu_profiler.hlo_graph."; constexpr uint64 kMaxEvents = 1000000; string GetCurrentTimeStampAsString() { @@ -68,65 +46,13 @@ string GetCurrentTimeStampAsString() { return s; } -Status WriteGzippedDataToFile(const string& filename, const string& data) { - std::unique_ptr file; - TF_RETURN_IF_ERROR(Env::Default()->NewWritableFile(filename, &file)); - io::ZlibCompressionOptions options = io::ZlibCompressionOptions::GZIP(); - io::ZlibOutputBuffer buffer(file.get(), options.input_buffer_size, - options.output_buffer_size, options); - TF_RETURN_IF_ERROR(buffer.Init()); - TF_RETURN_IF_ERROR(buffer.Append(data)); - TF_RETURN_IF_ERROR(buffer.Close()); - TF_RETURN_IF_ERROR(file->Close()); - return Status::OK(); -} - -// Dumps profile data to /plugins/profile//. -inline string CreateProfileRunDirectory(const string& logdir, - const string& run) { - string run_dir = JoinPath(logdir, kProfilePluginDirectory, run); - TF_CHECK_OK(Env::Default()->RecursivelyCreateDir(run_dir)); - return run_dir; -} - -void DumpTraceToLogDirectory(StringPiece run_dir, const string& encoded_trace) { - string proto_path = JoinPath(run_dir, kProtoTraceFileName); - TF_CHECK_OK(WriteStringToFile(Env::Default(), proto_path, encoded_trace)); - LOG(INFO) << "Dumped raw-proto trace data to " << proto_path; - - string json_path = JoinPath(run_dir, kJsonTraceFileName); - Trace trace; - trace.ParseFromString(encoded_trace); - std::cout << "Trace contains " << trace.trace_events_size() << " events." - << std::endl; - TF_CHECK_OK(WriteGzippedDataToFile(json_path, TraceEventsToJson(trace))); - std::cout << "Dumped JSON trace data to " << json_path << std::endl; -} - -void DumpOpProfileToLogDirectory(StringPiece run_dir, - const tpu::op_profile::Profile& profile) { - string path = JoinPath(run_dir, kJsonOpProfileFileName); - string json; - JsonOptions options; - options.always_print_primitive_fields = true; - auto status = MessageToJsonString(profile, &json, options); - if (!status.ok()) { - std::cerr << "Failed to convert op profile to json. Skipping... " - << status.error_message() << std::endl; - return; - } - TF_CHECK_OK(WriteStringToFile(Env::Default(), path, json)); - std::cout << "Dumped json op profile data to " << path << std::endl; -} - ProfileResponse Profile(const string& service_addr, int duration_ms) { ProfileRequest request; request.set_duration_ms(duration_ms); request.set_max_events(kMaxEvents); std::cout << "Limiting the number of trace events to " << kMaxEvents << std::endl; - ProfileResponse response; - ClientContext context; + ::grpc::ClientContext context; ::grpc::ChannelArguments channel_args; // TODO(ioeric): use `SetMaxReceiveMessageSize` instead once it's available. channel_args.SetInt(GRPC_ARG_MAX_MESSAGE_LENGTH, @@ -134,45 +60,11 @@ ProfileResponse Profile(const string& service_addr, int duration_ms) { std::unique_ptr stub = TPUProfiler::NewStub(::grpc::CreateCustomChannel( service_addr, ::grpc::InsecureChannelCredentials(), channel_args)); + ProfileResponse response; TF_QCHECK_OK(FromGrpcStatus(stub->Profile(&context, request, &response))); return response; } -void DumpGraphEvents(const string& logdir, const string& run, - const ProfileResponse& response) { - int num_graphs = response.computation_graph_size(); - if (response.computation_graph_size() == 0) return; - // The server might generates multiple graphs for one program; we simply - // pick the first one. - if (num_graphs > 1) { - std::cout << num_graphs - << " TPU program variants observed over the profiling period. " - << "One computation graph will be chosen arbitrarily." - << std::endl; - } - // The graph plugin expects the graph in //. - string run_dir = JoinPath(logdir, strings::StrCat(kGraphRunPrefix, run)); - TF_CHECK_OK(Env::Default()->RecursivelyCreateDir(run_dir)); - EventsWriter event_writer(JoinPath(run_dir, "events")); - Event event; - // Add the computation graph. - event.set_graph_def(response.computation_graph(0).SerializeAsString()); - event_writer.WriteEvent(event); - std::cout << "Wrote a HLO graph to " << event_writer.FileName() << std::endl; - - if (response.has_hlo_metadata()) { - tensorflow::TaggedRunMetadata tagged_run_metadata; - tagged_run_metadata.set_tag(run); - tagged_run_metadata.set_run_metadata( - response.hlo_metadata().SerializeAsString()); - tensorflow::Event meta_event; - *meta_event.mutable_tagged_run_metadata() = tagged_run_metadata; - event_writer.WriteEvent(meta_event); - std::cout << "Wrote HLO ops run metadata to " << event_writer.FileName() - << std::endl; - } -} - } // namespace } // namespace tpu } // namespace tensorflow @@ -203,23 +95,8 @@ int main(int argc, char** argv) { tensorflow::tpu::Profile(FLAGS_service_addr, duration_ms); // Use the current timestamp as the run name. tensorflow::string run = tensorflow::tpu::GetCurrentTimeStampAsString(); - tensorflow::string run_dir = - tensorflow::tpu::CreateProfileRunDirectory(FLAGS_logdir, run); - // Ignore computation_graph for now. - if (response.encoded_trace().empty()) { - std::cout << "No trace event is collected during the " << duration_ms - << "ms interval." << std::endl; - } else { - LOG(INFO) << "Converting trace events to TraceViewer JSON."; - tensorflow::tpu::DumpTraceToLogDirectory(run_dir, response.encoded_trace()); - } - tensorflow::tpu::DumpGraphEvents(FLAGS_logdir, run, response); - if (response.has_op_profile() && - (response.op_profile().has_by_program_structure() || - response.op_profile().has_by_category())) { - tensorflow::tpu::DumpOpProfileToLogDirectory(run_dir, - response.op_profile()); - } + TF_CHECK_OK(tensorflow::tpu::WriteTensorboardTPUProfile( + FLAGS_logdir, run, response, &std::cout)); // Print this at the end so that it's not buried in irrelevant LOG messages. std::cout << "NOTE: using the trace duration " << duration_ms << "ms." << std::endl diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc new file mode 100644 index 0000000000..7541a5291d --- /dev/null +++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc @@ -0,0 +1,164 @@ +/* Copyright 2017 The TensorFlow Authors All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tpu/profiler/dump_tpu_profile.h" + +#include +#include +#include + +#include "tensorflow/contrib/tpu/profiler/op_profile.pb.h" +#include "tensorflow/contrib/tpu/profiler/trace_events.pb.h" +#include "tensorflow/contrib/tpu/profiler/trace_events_to_json.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/io/compression.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/protobuf/config.pb.h" +#include "tensorflow/core/util/event.pb.h" +#include "tensorflow/core/util/events_writer.h" + +namespace tensorflow { +namespace tpu { +namespace { + +using ::tensorflow::io::JoinPath; +using ::tensorflow::protobuf::util::JsonOptions; +using ::tensorflow::protobuf::util::MessageToJsonString; + +constexpr char kProfilePluginDirectory[] = "plugins/profile/"; +constexpr char kJsonOpProfileFileName[] = "op_profile.json"; +constexpr char kProtoTraceFileName[] = "trace"; +constexpr char kJsonTraceFileName[] = "trace.json.gz"; +constexpr char kGraphRunPrefix[] = "tpu_profiler.hlo_graph."; + +Status WriteGzippedDataToFile(const string& filename, const string& data) { + std::unique_ptr file; + TF_RETURN_IF_ERROR(Env::Default()->NewWritableFile(filename, &file)); + io::ZlibCompressionOptions options = io::ZlibCompressionOptions::GZIP(); + io::ZlibOutputBuffer buffer(file.get(), options.input_buffer_size, + options.output_buffer_size, options); + TF_RETURN_IF_ERROR(buffer.Init()); + TF_RETURN_IF_ERROR(buffer.Append(data)); + TF_RETURN_IF_ERROR(buffer.Close()); + TF_RETURN_IF_ERROR(file->Close()); + return Status::OK(); +} + +Status DumpTraceToLogDirectory(StringPiece run_dir, const string& encoded_trace, + std::ostream* os) { + string proto_path = JoinPath(run_dir, kProtoTraceFileName); + TF_RETURN_IF_ERROR( + WriteStringToFile(Env::Default(), proto_path, encoded_trace)); + LOG(INFO) << "Dumped raw-proto trace data to " << proto_path; + + string json_path = JoinPath(run_dir, kJsonTraceFileName); + Trace trace; + trace.ParseFromString(encoded_trace); + *os << "Trace contains " << trace.trace_events_size() << " events." + << std::endl; + TF_RETURN_IF_ERROR( + WriteGzippedDataToFile(json_path, TraceEventsToJson(trace))); + *os << "Dumped JSON trace data to " << json_path << std::endl; + return Status::OK(); +} + +Status DumpOpProfileToLogDirectory(StringPiece run_dir, + const tpu::op_profile::Profile& profile, + std::ostream* os) { + string path = JoinPath(run_dir, kJsonOpProfileFileName); + string json; + JsonOptions options; + options.always_print_primitive_fields = true; + auto status = MessageToJsonString(profile, &json, options); + if (!status.ok()) { + return errors::Internal( + "Failed to convert op profile to json. Skipping... ", + string(status.error_message())); + } + TF_RETURN_IF_ERROR(WriteStringToFile(Env::Default(), path, json)); + *os << "Dumped json op profile data to " << path << std::endl; + return Status::OK(); +} + +Status DumpGraphEvents(const string& logdir, const string& run, + const ProfileResponse& response, std::ostream* os) { + int num_graphs = response.computation_graph_size(); + if (response.computation_graph_size() == 0) return Status::OK(); + // The server might generates multiple graphs for one program; we simply + // pick the first one. + if (num_graphs > 1) { + *os << num_graphs + << " TPU program variants observed over the profiling period. " + << "One computation graph will be chosen arbitrarily." << std::endl; + } + // The graph plugin expects the graph in //. + string run_dir = JoinPath(logdir, strings::StrCat(kGraphRunPrefix, run)); + TF_RETURN_IF_ERROR(Env::Default()->RecursivelyCreateDir(run_dir)); + EventsWriter event_writer(JoinPath(run_dir, "events")); + Event event; + // Add the computation graph. + event.set_graph_def(response.computation_graph(0).SerializeAsString()); + event_writer.WriteEvent(event); + *os << "Wrote a HLO graph to " << event_writer.FileName() << std::endl; + + if (response.has_hlo_metadata()) { + tensorflow::TaggedRunMetadata tagged_run_metadata; + tagged_run_metadata.set_tag(run); + tagged_run_metadata.set_run_metadata( + response.hlo_metadata().SerializeAsString()); + tensorflow::Event meta_event; + *meta_event.mutable_tagged_run_metadata() = tagged_run_metadata; + event_writer.WriteEvent(meta_event); + *os << "Wrote HLO ops run metadata to " << event_writer.FileName() + << std::endl; + } + return Status::OK(); +} + +} // namespace + +Status WriteTensorboardTPUProfile(const string& logdir, const string& run, + const ProfileResponse& response, + std::ostream* os) { + // Dumps profile data to /plugins/profile//. + string profile_run_dir = JoinPath(logdir, kProfilePluginDirectory, run); + TF_RETURN_IF_ERROR(Env::Default()->RecursivelyCreateDir(profile_run_dir)); + // Ignore computation_graph for now. + if (response.encoded_trace().empty()) { + *os << "No trace event is collected." << std::endl; + } else { + LOG(INFO) << "Converting trace events to TraceViewer JSON."; + TF_RETURN_IF_ERROR( + DumpTraceToLogDirectory(profile_run_dir, response.encoded_trace(), os)); + } + if (response.has_op_profile() && + (response.op_profile().has_by_program_structure() || + response.op_profile().has_by_category())) { + TF_RETURN_IF_ERROR(DumpOpProfileToLogDirectory(profile_run_dir, + response.op_profile(), os)); + } + + TF_RETURN_IF_ERROR(DumpGraphEvents(logdir, run, response, os)); + + return Status::OK(); +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.h b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.h new file mode 100644 index 0000000000..65b92aa418 --- /dev/null +++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.h @@ -0,0 +1,38 @@ +/* Copyright 2017 The TensorFlow Authors All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TPU_PROFILER_DUMP_TPU_PROFILE_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_TPU_PROFILER_DUMP_TPU_PROFILE_H_ + +#include "tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace tpu { + +// Dumps all profiling tool data in a TPU profile to a TensorBoard log directory +// with the given run name. This writes user-facing log messages to `os`. +// The following tools are supported: +// - Trace viewer +// - Op profile +// - HLO computation graph +Status WriteTensorboardTPUProfile(const string& logdir, const string& run, + const ProfileResponse& response, + std::ostream* os); + +} // namespace tpu +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TPU_PROFILER_DUMP_TPU_PROFILE_H_ -- GitLab From 0f726c6b710961b540bab913e860d316591b2048 Mon Sep 17 00:00:00 2001 From: "liu.guangcong" Date: Fri, 25 Aug 2017 09:32:10 +0800 Subject: [PATCH 253/294] remove unused cpu allocator factory --- tensorflow/core/framework/allocator.cc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc index e7092f549b..f5dadf76da 100644 --- a/tensorflow/core/framework/allocator.cc +++ b/tensorflow/core/framework/allocator.cc @@ -117,16 +117,6 @@ class CPUAllocator : public Allocator { TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator); }; -namespace { -Allocator* MakeCpuAllocator() { - Allocator* allocator = new CPUAllocator; - if (cpu_allocator_collect_full_stats || LogMemory::IsEnabled()) { - allocator = new TrackingAllocator(allocator, true); - } - return allocator; -} -} // namespace - Allocator* cpu_allocator() { static Allocator* cpu_alloc = AllocatorRegistry::Global()->GetAllocator(); if (cpu_allocator_collect_full_stats && !cpu_alloc->TracksAllocationSizes()) { -- GitLab From 519e91ddb028bcd9d7533b2572f71e86c5e3979c Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 24 Aug 2017 23:54:51 +0000 Subject: [PATCH 254/294] Add support of int8/uint8/int16/uint16 for tf.subtract This fix adds support of int8/uint8/int16/uint16 for tf.subtract (on CPU only) This fix fixes 12571. Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_op_sub.cc | 5 ++++- tensorflow/core/ops/math_ops.cc | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc index eb173c7040..6adaecba04 100644 --- a/tensorflow/core/kernels/cwise_op_sub.cc +++ b/tensorflow/core/kernels/cwise_op_sub.cc @@ -18,7 +18,10 @@ limitations under the License. namespace tensorflow { REGISTER7(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32, int64, complex64, complex128); -#if defined(__ANDROID_TYPES_SLIM__) +#if !defined(__ANDROID_TYPES_SLIM__) +// Sub op for int8, uint8, int16, uint16 +REGISTER4(BinaryOp, CPU, "Sub", functor::sub, int8, uint8, int16, uint16); +#else // We only register the first type when we have multi-argument calls in the // case where we're trying to reduce executable size, but it turns out that the // int32 version of this op is needed, so explicitly include it. diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 6ff05bd2a6..6eb05874aa 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -499,7 +499,7 @@ Returns x + y element-wise. )doc"); REGISTER_OP("Sub") - .BINARY_FEWER() + .BINARY_MORE() .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .Doc(R"doc( Returns x - y element-wise. -- GitLab From 5f950816981816d3fb32cbfdc1547c527d89873f Mon Sep 17 00:00:00 2001 From: Guillaume Klein Date: Thu, 24 Aug 2017 11:58:05 +0200 Subject: [PATCH 255/294] Fix deprecation warnings of *_global_step functions. --- tensorflow/contrib/layers/python/layers/optimizers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py index 7eb410b4c7..33db93b970 100644 --- a/tensorflow/contrib/layers/python/layers/optimizers.py +++ b/tensorflow/contrib/layers/python/layers/optimizers.py @@ -156,9 +156,9 @@ def optimize_loss(loss, loss = ops.convert_to_tensor(loss) contrib_framework.assert_scalar(loss) if global_step is None: - global_step = contrib_framework.get_global_step() + global_step = train.get_global_step() else: - contrib_framework.assert_global_step(global_step) + train.assert_global_step(global_step) with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: -- GitLab From 3f43cabbb1b0db790a5cd23d24ccd858e8ea631e Mon Sep 17 00:00:00 2001 From: Alan Yee Date: Tue, 5 Sep 2017 09:17:37 -0700 Subject: [PATCH 256/294] Update head.py (#12548) Update contrib lookup to core lookup --- .../contrib/learn/python/learn/estimators/head.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index c31d5d2d47..225d879678 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -24,7 +24,6 @@ import six from tensorflow.contrib import framework as framework_lib from tensorflow.contrib import layers as layers_lib -from tensorflow.contrib import lookup as lookup_lib from tensorflow.contrib.learn.python.learn.estimators import constants from tensorflow.contrib.learn.python.learn.estimators import model_fn from tensorflow.contrib.learn.python.learn.estimators import prediction_key @@ -35,6 +34,7 @@ from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import logging_ops +from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import nn @@ -1070,9 +1070,8 @@ class _MultiClassHead(_SingleHead): labels_tensor = _to_labels_tensor(labels, self._label_name) _check_no_sparse_tensor(labels_tensor) if self._label_keys: - table = lookup_lib.string_to_index_table_from_tensor( - mapping=self._label_keys, - name="label_id_lookup") + table = lookup_ops.index_table_from_tensor(self._label_keys, + name="label_id_lookup") return { "labels": labels_tensor, "label_ids": table.lookup(labels_tensor), @@ -1106,9 +1105,8 @@ class _MultiClassHead(_SingleHead): class_ids = math_ops.argmax( logits, 1, name=prediction_key.PredictionKey.CLASSES) if self._label_keys: - table = lookup_lib.index_to_string_table_from_tensor( - mapping=self._label_keys, - name="class_string_lookup") + table = lookup_ops.index_to_string_table_from_tensor( + self._label_keys, name="class_string_lookup") classes = table.lookup(class_ids) else: classes = class_ids -- GitLab From c8981445c58b3022b36941bd9f34683a116c8c9f Mon Sep 17 00:00:00 2001 From: Courtial Florian Date: Tue, 5 Sep 2017 18:22:22 +0200 Subject: [PATCH 257/294] Fix bug on the gradients graph computation caused by an Assign node - C++ API (#12397) * Fix bug on the gradients graph computation caused by an Assign node. * Adding a reachable_nodes array to the gradient computation graph. * Adding test cases when the Var has unreachable nodes. --- tensorflow/cc/framework/gradients.cc | 36 +++++++++++- tensorflow/cc/framework/gradients_test.cc | 67 +++++++++++++++++++++++ tensorflow/cc/framework/testutil.cc | 14 +++++ tensorflow/cc/framework/testutil.h | 12 ++++ 4 files changed, 127 insertions(+), 2 deletions(-) diff --git a/tensorflow/cc/framework/gradients.cc b/tensorflow/cc/framework/gradients.cc index 66a943410e..1868207148 100644 --- a/tensorflow/cc/framework/gradients.cc +++ b/tensorflow/cc/framework/gradients.cc @@ -77,6 +77,10 @@ class SymbolicGradientBuilder { Status CallGradFunction(const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs); + + // Returns a list mapping whether each node in the graph is reachable + // from outputs_. Keyed by node id. + std::vector GetReachableNodes(); const Scope& scope_; const ops::GradOpRegistry* registry_; @@ -143,11 +147,36 @@ Status SymbolicGradientBuilder::BackpropAlongEdge(const Output& dst_grad, return Status::OK(); } +std::vector SymbolicGradientBuilder::GetReachableNodes() { + std::vector reachable_nodes(scope_.graph()->num_node_ids(), false); + std::deque queue; + for (const Output& out : outputs_) { + if (!reachable_nodes[out.node()->id()]) { + queue.push_back(out.node()); + reachable_nodes[out.node()->id()] = true; + } + } + + while (!queue.empty()) { + Node* n = queue.front(); + queue.pop_front(); + for (const Edge* e : n->in_edges()) { + if (e->IsControlEdge()) continue; + queue.push_back(e->src()); + reachable_nodes[e->src()->id()] = true; + } + } + return reachable_nodes; +} + Status SymbolicGradientBuilder::Initialize() { if (outputs_.size() != grad_inputs_.size()) { return errors::InvalidArgument( "Must specify a gradient input for each output."); } + std::vector reachable_nodes = GetReachableNodes(); + // TODO(theflofly) Check that inputs_ are reachable from + // outputs_ using reachable_nodes grad_outputs_->clear(); grad_outputs_->resize(inputs_.size()); // Populate `output_nodes_` from node ids in `outputs_`. @@ -188,12 +217,15 @@ Status SymbolicGradientBuilder::Initialize() { if (output_nodes_.find(n->id()) == output_nodes_.end()) { // Internal node: continue BFS along connected outputs. for (const Edge* e : n->out_edges()) { - if (e->IsControlEdge()) continue; - ++num_expected_backprops; + // If a node is not reachable from outputs_, + // we don't expect it to receive a backpropagated gradient. + // It will not be counted in num_expected_backprops. + if (e->IsControlEdge() || !reachable_nodes[e->dst()->id()]) continue; if (visited.find(e->dst()) == visited.end()) { queue.push_back(e->dst()); visited.insert(e->dst()); } + ++num_expected_backprops; } } else { // Output node: stop BFS and update `num_expected_backprops` for diff --git a/tensorflow/cc/framework/gradients_test.cc b/tensorflow/cc/framework/gradients_test.cc index 24af7d567b..032ab93623 100644 --- a/tensorflow/cc/framework/gradients_test.cc +++ b/tensorflow/cc/framework/gradients_test.cc @@ -364,6 +364,73 @@ TEST_F(GradientsTest, MultipleNodeOutputGrads) { test::AsTensor({60, 61, 62, 63, 66, 66, 66, 67}, {4, 2})); } +TEST_F(GradientsTest, UnreachableEdgeGradOneOutput) { + auto x = Variable(scope_test_, {2, 3}, DT_DOUBLE); + auto x_const = Const(scope_test_, {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}); + auto x_assign = Assign(scope_test_, x, x_const); + + auto y = Variable(scope_test_, {3, 1}, DT_DOUBLE); + auto y_const = Const(scope_test_, {{1.0}, {2.0}, {3.0}}); + auto y_assign = Assign(scope_test_, y, y_const); + + auto m1 = MatMul(scope_test_, x, y); + + auto z = Variable(scope_test_, {1, 3}, DT_DOUBLE); + auto z_const = Const(scope_test_, {{9.0, 10.0, 11.0}}); + auto z_assign = Assign(scope_test_, z, z_const); + + auto m2 = MatMul(scope_test_, y, z); + + auto dm1 = Const(scope_test_, {{0.5}, {0.5}}); + + std::vector grad_outputs; + TF_ASSERT_OK( + AddSymbolicGradients(scope_test_, {m1}, {y}, {dm1}, &grad_outputs)); + + std::vector outputs; + test::GetTensors(scope_test_, {x_assign, y_assign, z_assign}, + {grad_outputs[0]}, &outputs); + // dz/dy = xT * dm1 + test::ExpectTensorNear( + outputs[0], test::AsTensor({2.5, 3.5, 4.5}, {3, 1}), 1e-5); +} + +TEST_F(GradientsTest, UnreachableEdgeGradTwoOutputs) { + auto x = Variable(scope_test_, {2, 3}, DT_DOUBLE); + auto x_const = Const(scope_test_, {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}); + auto x_assign = Assign(scope_test_, x, x_const); + + auto y = Variable(scope_test_, {3, 1}, DT_DOUBLE); + auto y_const = Const(scope_test_, {{1.0}, {2.0}, {3.0}}); + auto y_assign = Assign(scope_test_, y, y_const); + + auto m1 = MatMul(scope_test_, x, y); + + auto z = Variable(scope_test_, {1, 3}, DT_DOUBLE); + auto z_const = Const(scope_test_, {{9.0, 10.0, 11.0}}); + auto z_assign = Assign(scope_test_, z, z_const); + + auto m2 = MatMul(scope_test_, y, z); + + auto dm1 = Const(scope_test_, {{0.5}, {0.5}}); + auto dm2 = + Const(scope_test_, {{0.5, 0.5, 0.5}, {0.6, 0.7, 0.8}, {0.6, 0.7, 0.9}}); + + std::vector grad_outputs; + TF_ASSERT_OK(AddSymbolicGradients(scope_test_, {m1, m2}, {y}, {dm1, dm2}, + &grad_outputs)); + + std::vector outputs; + test::GetTensors(scope_test_, {x_assign, y_assign, z_assign}, + {grad_outputs[0]}, &outputs); + + // the gradients from m1 and m2 will be summed to compute the gradient + // w.r.t y + // dz/dy = xT * dm1 + dm2 * zT + test::ExpectTensorNear( + outputs[0], test::AsTensor({17.5, 24.7, 26.8}, {3, 1}), 1e-5); +} + // StopGradientSingleOutputMultiEdgeTest tests combinations of valid and // 'NoGradient' (induced by StopGradient op) returned along multiple edges from // a single nodes output. diff --git a/tensorflow/cc/framework/testutil.cc b/tensorflow/cc/framework/testutil.cc index ca78f31db5..25ee08f676 100644 --- a/tensorflow/cc/framework/testutil.cc +++ b/tensorflow/cc/framework/testutil.cc @@ -36,5 +36,19 @@ void GetTensor(const Scope& scope, Output tensor, Tensor* out) { *out = outputs[0]; } +void GetTensors(const Scope& scope, const std::vector& assign_vars, + OutputList tensors, std::vector* out) { + ClientSession session(scope); + TF_CHECK_OK(session.Run(assign_vars, nullptr)); + TF_CHECK_OK(session.Run(tensors, out)); +} + +void GetTensor(const Scope& scope, const std::vector& assign_vars, + Output tensor, Tensor* out) { + std::vector outputs; + GetTensors(scope, assign_vars, {std::move(tensor)}, &outputs); + *out = outputs[0]; +} + } // end namespace test } // end namespace tensorflow diff --git a/tensorflow/cc/framework/testutil.h b/tensorflow/cc/framework/testutil.h index d027ad3744..ca57c0f0a4 100644 --- a/tensorflow/cc/framework/testutil.h +++ b/tensorflow/cc/framework/testutil.h @@ -26,9 +26,21 @@ namespace test { void GetTensors(const Scope& scope, OutputList tensors, std::vector* out); +// Computes the outputs listed in 'tensors', returns the tensors in 'out'. +// assign_vars are extra outputs that should be run +// e.g. to assign values to variables. +void GetTensors(const Scope& scope, const std::vector& assign_vars, + OutputList tensors, std::vector* out); + /// Computes the output 'tensor', returning the resulting tensor in 'out'. void GetTensor(const Scope& scope, Output tensor, Tensor* out); +// Computes the output 'tensor', returning the resulting tensor in 'out'. +// assign_vars are extra outputs that should be run +// e.g. to assign values to variables. +void GetTensor(const Scope& scope, const std::vector& assign_vars, + Output tensor, Tensor* out); + } // namespace test } // namespace tensorflow -- GitLab From 2306697c8a9ee747ad828f099ffe67ba0fe6b072 Mon Sep 17 00:00:00 2001 From: Lin Min Date: Tue, 5 Sep 2017 11:28:42 -0500 Subject: [PATCH 258/294] Add SerializeTensor operator (#11992) * Add SerializeTensor operator * Remove empty constructor for SerializeTensorOp * Add test for SerializeTensorOp * Explicitly test each dtype for SerializeTensorOp * Fix BUILD file format * update goldens --- tensorflow/core/kernels/BUILD | 14 ++ tensorflow/core/kernels/parse_tensor_op.cc | 29 +++ tensorflow/core/kernels/parse_tensor_test.cc | 213 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 19 ++ tensorflow/core/ops/parsing_ops.cc | 13 ++ tensorflow/python/ops/io_ops.py | 1 + tensorflow/python/ops/parsing_ops.py | 1 + tensorflow/tools/api/golden/tensorflow.pbtxt | 4 + 8 files changed, 294 insertions(+) create mode 100644 tensorflow/core/kernels/parse_tensor_test.cc diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 174ccde8b7..b51fc841d1 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -3343,6 +3343,20 @@ tf_kernel_library( deps = PARSING_DEPS, ) +tf_cc_test( + name = "parse_tensor_test", + srcs = ["parse_tensor_test.cc"], + deps = [ + ":ops_testutil", + ":ops_util", + ":parse_tensor_op", + "//tensorflow/core:framework", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_kernel_library( name = "string_to_number_op", prefix = "string_to_number_op", diff --git a/tensorflow/core/kernels/parse_tensor_op.cc b/tensorflow/core/kernels/parse_tensor_op.cc index 79199ff5c3..dd645262d2 100644 --- a/tensorflow/core/kernels/parse_tensor_op.cc +++ b/tensorflow/core/kernels/parse_tensor_op.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/framework/register_types.h" namespace tensorflow { @@ -65,4 +66,32 @@ class ParseTensorOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("ParseTensor").Device(DEVICE_CPU), ParseTensorOp); + +template +class SerializeTensorOp : public OpKernel { + public: + using OpKernel::OpKernel; + + void Compute(OpKernelContext* context) override { + const Tensor& tensor = context->input(0); + TensorProto proto; + if (tensor.dtype() == DT_STRING) { + tensor.AsProtoField(&proto); + } else { + tensor.AsProtoTensorContent(&proto); + } + Tensor* proto_string = nullptr; + OP_REQUIRES_OK( + context, context->allocate_output(0, TensorShape({}), &proto_string)); + CHECK(proto.SerializeToString(&proto_string->scalar()())); + } +}; + +#define REGISTER(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("SerializeTensor").Device(DEVICE_CPU).TypeConstraint("T"), \ + SerializeTensorOp); +TF_CALL_ALL_TYPES(REGISTER) +#undef REGISTER + } // namespace tensorflow diff --git a/tensorflow/core/kernels/parse_tensor_test.cc b/tensorflow/core/kernels/parse_tensor_test.cc new file mode 100644 index 0000000000..f6f60fee71 --- /dev/null +++ b/tensorflow/core/kernels/parse_tensor_test.cc @@ -0,0 +1,213 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/ops_testutil.h" + +namespace tensorflow { +namespace { + +class SerializeTensorOpTest : public OpsTestBase { + protected: + template + void MakeOp(const TensorShape& input_shape, + std::function functor) { + TF_ASSERT_OK( + NodeDefBuilder("myop", "SerializeTensor") + .Input(FakeInput(DataTypeToEnum::value)) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + AddInput(input_shape, functor); + } + void ParseSerializedWithNodeDef(const NodeDef& parse_node_def, + Tensor* serialized, + Tensor* parse_output) { + std::unique_ptr device( + DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0")); + gtl::InlinedVector inputs; + inputs.push_back({nullptr, serialized}); + Status status; + std::unique_ptr op( + CreateOpKernel(DEVICE_CPU, device.get(), + cpu_allocator(), parse_node_def, + TF_GRAPH_DEF_VERSION, &status)); + TF_EXPECT_OK(status); + OpKernelContext::Params params; + params.device = device.get(); + params.inputs = &inputs; + params.frame_iter = FrameAndIter(0, 0); + params.op_kernel = op.get(); + std::vector attrs; + test::SetOutputAttrs(¶ms, &attrs); + OpKernelContext ctx(¶ms); + op->Compute(&ctx); + TF_EXPECT_OK(status); + *parse_output = *ctx.mutable_output(0); + } + template + void ParseSerializedOutput(Tensor* serialized, Tensor* parse_output) { + NodeDef parse; + TF_ASSERT_OK(NodeDefBuilder("parse", "ParseTensor") + .Input(FakeInput(DT_STRING)) + .Attr("out_type", DataTypeToEnum::value) + .Finalize(&parse)); + ParseSerializedWithNodeDef(parse, serialized, parse_output); + } +}; + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_half) { + MakeOp(TensorShape({10}), [](int x) -> Eigen::half { + return static_cast(x / 10.); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_float) { + MakeOp(TensorShape({1, 10}), [](int x) -> float { + return static_cast(x / 10.); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_double) { + MakeOp(TensorShape({5, 5}), [](int x) -> double { + return static_cast(x / 10.); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int64) { + MakeOp(TensorShape({2, 3, 4}), [](int x) -> int64 { + return static_cast(x - 10); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int32) { + MakeOp(TensorShape({4, 2}), [](int x) -> int32 { + return static_cast(x + 7); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int16) { + MakeOp(TensorShape({8}), [](int x) -> int16 { + return static_cast(x + 18); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int8) { + MakeOp(TensorShape({2}), [](int x) -> int8 { + return static_cast(x + 8); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_uint16) { + MakeOp(TensorShape({1, 3}), [](int x) -> uint16 { + return static_cast(x + 2); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_uint8) { + MakeOp(TensorShape({2, 1, 1}), [](int x) -> uint8 { + return static_cast(x + 1); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_complex64) { + MakeOp(TensorShape({}), [](int x) -> complex64 { + return complex64{ static_cast(x / 8.), + static_cast(x / 2.) }; + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_complex128) { + MakeOp(TensorShape({3}), [](int x) -> complex128 { + return complex128{ x / 3., x / 2. }; + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_bool) { + MakeOp(TensorShape({1}), [](int x) -> bool { + return static_cast(x % 2); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_string) { + MakeOp(TensorShape({10}), [](int x) -> std::string { + return std::to_string(x / 10.); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput(GetOutput(0), &parse_output); + test::ExpectTensorEqual(parse_output, GetInput(0)); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 3a28ce3767..35c31c6cb8 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -15804,6 +15804,25 @@ op { } summary: "Transforms a serialized tensorflow.TensorProto proto into a Tensor." } +op { + name: "SerializeTensor" + input_arg { + name: "tensor" + description: "A Tensor of type `T`." + type: "T" + } + output_arg { + name: "serialized" + description: "A serialized TensorProto proto of the input tensor." + type_attr: DT_STRING + } + attr { + name: "T" + type: "type" + description: "The type of the input tensor." + } + summary: "Transforms a Tensor into a serialized TensorProto proto." +} op { name: "Placeholder" output_arg { diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc index 2e605fdffc..1f7ebe91cf 100644 --- a/tensorflow/core/ops/parsing_ops.cc +++ b/tensorflow/core/ops/parsing_ops.cc @@ -292,6 +292,19 @@ out_type: The type of the serialized tensor. The provided type must match the output: A Tensor of type `out_type`. )doc"); +REGISTER_OP("SerializeTensor") + .Input("tensor: T") + .Output("serialized: string") + .Attr("T: type") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Transforms a Tensor into a serialized TensorProto proto. + +tensor: A Tensor of type `T`. +T: The type of the input tensor. +serialized: A serialized TensorProto proto of the input tensor. +)doc"); + REGISTER_OP("DecodeJSONExample") .Input("json_examples: string") .Output("binary_examples: string") diff --git a/tensorflow/python/ops/io_ops.py b/tensorflow/python/ops/io_ops.py index 5cd5d7ba2f..bd879ac423 100644 --- a/tensorflow/python/ops/io_ops.py +++ b/tensorflow/python/ops/io_ops.py @@ -37,6 +37,7 @@ See the @{$python/io_ops} guide. @@parse_example @@parse_single_example @@parse_tensor +@@serialize_tensor @@decode_json_example @@QueueBase @@FIFOQueue diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py index e0e3d08e7c..bf7c9fac8e 100644 --- a/tensorflow/python/ops/parsing_ops.py +++ b/tensorflow/python/ops/parsing_ops.py @@ -40,6 +40,7 @@ from tensorflow.python.platform import tf_logging ops.NotDifferentiable("DecodeRaw") ops.NotDifferentiable("ParseTensor") +ops.NotDifferentiable("SerializeTensor") ops.NotDifferentiable("StringToNumber") diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 667ae5cf6e..7ad00281a1 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1684,6 +1684,10 @@ tf_module { name: "serialize_sparse" argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "serialize_tensor" + argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "set_random_seed" argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=None" -- GitLab From 674db817310264ec1de49fff20971741e1dc6e36 Mon Sep 17 00:00:00 2001 From: Fred Reiss Date: Tue, 5 Sep 2017 09:34:23 -0700 Subject: [PATCH 259/294] Add name of C++ source file to generated Python files for ops. (#11663) * Embed orig source file name in generated Python * Change pass-by-value --> by-ref to address review comment. * Reformatting source code with clang-format. * Change to different test for empty string per review comment. * Moved code from python_op_gen to python_eager_op_gen. --- .../python/eager/python_eager_op_gen.cc | 25 ++++++++--- tensorflow/python/eager/python_eager_op_gen.h | 5 ++- .../python/framework/python_op_gen_main.cc | 43 ++++++++++++++++--- 3 files changed, 61 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc index c46a3d8db3..62579bd23a 100644 --- a/tensorflow/python/eager/python_eager_op_gen.cc +++ b/tensorflow/python/eager/python_eager_op_gen.cc @@ -659,14 +659,26 @@ void GenEagerPythonOp::AddEagerExecute(const string& num_outputs_expr) { string GetEagerPythonOps(const OpList& ops, const std::vector& hidden_ops, - bool require_shapes) { + bool require_shapes, + const string& source_file_name = "") { + string result; // Header // TODO(josh11b): Mention the library for which wrappers are being generated. - strings::StrAppend(&result, R"("""Python wrappers for TensorFlow ops. + strings::StrAppend(&result, R"("""Python wrappers around TensorFlow ops. This file is MACHINE GENERATED! Do not edit. -""" +)"); + + // Mention the original source file so someone tracing back through generated + // Python code will know where to look next. + if (!source_file_name.empty()) { + strings::StrAppend(&result, "Original C++ source file: "); + strings::StrAppend(&result, source_file_name); + strings::StrAppend(&result, "\n"); + } + + strings::StrAppend(&result, R"(""" import collections as _collections @@ -747,8 +759,11 @@ from tensorflow.python.framework import op_def_library as _op_def_library void PrintEagerPythonOps(const OpList& ops, const std::vector& hidden_ops, - bool require_shapes) { - printf("%s", GetEagerPythonOps(ops, hidden_ops, require_shapes).c_str()); + bool require_shapes, + const string& source_file_name) +{ + printf("%s", GetEagerPythonOps(ops, hidden_ops, require_shapes, + source_file_name).c_str()); } string GetEagerPythonWrappers(const char* op_list_buf, size_t op_list_len) { diff --git a/tensorflow/python/eager/python_eager_op_gen.h b/tensorflow/python/eager/python_eager_op_gen.h index 9a7ed28cf9..250623850f 100644 --- a/tensorflow/python/eager/python_eager_op_gen.h +++ b/tensorflow/python/eager/python_eager_op_gen.h @@ -24,9 +24,12 @@ namespace tensorflow { // hidden_ops should be a list of Op names that should get a leading _ // in the output. Prints the output to stdout. +// Optional fourth argument is the name of the original C++ source file +// where the ops' REGISTER_OP() calls reside. void PrintEagerPythonOps(const OpList& ops, const std::vector& hidden_ops, - bool require_shapes); + bool require_shapes, + const string& source_file_name = ""); // Get the python wrappers for a list of ops in a OpList. // `op_list_buf` should be a pointer to a buffer containing diff --git a/tensorflow/python/framework/python_op_gen_main.cc b/tensorflow/python/framework/python_op_gen_main.cc index 8366542288..3cf56330e0 100644 --- a/tensorflow/python/framework/python_op_gen_main.cc +++ b/tensorflow/python/framework/python_op_gen_main.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/op_def.pb.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/io/inputbuffer.h" +#include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/scanner.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" @@ -80,7 +81,31 @@ Status ParseOpListCommandLine(const char* arg, std::vector* op_list) { return Status::OK(); } -void PrintAllPythonOps(const std::vector& op_list, bool require_shapes, + +// Use the name of the current executable to infer the C++ source file +// where the REGISTER_OP() call for the operator can be found. +// Returns the name of the file. +// Returns an empty string if the current executable's name does not +// follow a known pattern. +string InferSourceFileName(const char* argv_zero) { + StringPiece command_str = io::Basename(argv_zero); + + // For built-in ops, the Bazel build creates a separate executable + // with the name gen__ops_py_wrappers_cc containing the + // operators defined in _ops.cc + const char* kExecPrefix = "gen_"; + const char* kExecSuffix = "_py_wrappers_cc"; + if (command_str.Consume(kExecPrefix) && command_str.ends_with(kExecSuffix)) { + command_str.remove_suffix(strlen(kExecSuffix)); + return strings::StrCat(command_str, ".cc"); + } else { + return string(""); + } +} + +void PrintAllPythonOps(const std::vector& op_list, + const string& source_file_name, + bool require_shapes, bool op_list_is_whitelist) { OpList ops; OpRegistry::Global()->Export(false, &ops); @@ -93,9 +118,9 @@ void PrintAllPythonOps(const std::vector& op_list, bool require_shapes, *pruned_ops.mutable_op()->Add() = op_def; } } - PrintEagerPythonOps(pruned_ops, {}, require_shapes); + PrintEagerPythonOps(pruned_ops, {}, require_shapes, source_file_name); } else { - PrintEagerPythonOps(ops, op_list, require_shapes); + PrintEagerPythonOps(ops, op_list, require_shapes, source_file_name); } } @@ -105,20 +130,26 @@ void PrintAllPythonOps(const std::vector& op_list, bool require_shapes, int main(int argc, char* argv[]) { tensorflow::port::InitMain(argv[0], &argc, &argv); + tensorflow::string source_file_name = + tensorflow::InferSourceFileName(argv[0]); + // Usage: // gen_main [ @FILENAME | OpName[,OpName]* ] (0 | 1) [0 | 1] if (argc == 2) { - tensorflow::PrintAllPythonOps({}, {}, tensorflow::string(argv[1]) == "1"); + tensorflow::PrintAllPythonOps({}, source_file_name, + tensorflow::string(argv[1]) == "1", + false /* op_list_is_whitelist */); } else if (argc == 3) { std::vector hidden_ops; TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[1], &hidden_ops)); - tensorflow::PrintAllPythonOps(hidden_ops, + tensorflow::PrintAllPythonOps(hidden_ops, source_file_name, tensorflow::string(argv[2]) == "1", false /* op_list_is_whitelist */); } else if (argc == 4) { std::vector op_list; TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[1], &op_list)); - tensorflow::PrintAllPythonOps(op_list, tensorflow::string(argv[2]) == "1", + tensorflow::PrintAllPythonOps(op_list, source_file_name, + tensorflow::string(argv[2]) == "1", tensorflow::string(argv[3]) == "1"); } else { return -1; -- GitLab From b5214cab6151fc9c0471829a05bab4872e2e3bc4 Mon Sep 17 00:00:00 2001 From: Tian Jin Date: Tue, 5 Sep 2017 12:36:07 -0400 Subject: [PATCH 260/294] Add GPU implementation for tf.segment_sum. (#11630) * Add GPU implementation for tf.segment_sum. * Refactor segment sum to compute asynchronously. * Add GPU tests and change test datatype to accommodate GPU kernel input requirement. * Add benchmarks. * Benchmark results against baseline unsorted impl. The columns are: datatypes, outer dimension, output outer dimension, inner dimension, execution time difference against baseline impl as a percentage of the time taken by the baseline impl (positive values mean faster execution than baseline). fp32 512 256 512 -11.8632 fp64 512 256 512 -10.1854 fp32 512 256 2048 -6.2147 fp64 512 256 2048 -0.106 fp32 512 256 8192 -0.0867 fp64 512 256 8192 3.6285 fp32 512 256 1120 -10.9163 fp64 512 256 1120 -1.3509 fp32 512 256 1215 -2.0428 fp64 512 256 1215 -7.884 fp32 512 256 1856 -7.3159 fp64 512 256 1856 -0.1011 fp32 512 256 1302 -3.7802 fp64 512 256 1302 4.6675 fp32 512 256 1329 -13.2275 fp64 512 256 1329 4.3505 fp32 512 256 1531 -7.5993 fp64 512 256 1531 -3.5371 fp32 512 256 1313 -5.0677 fp64 512 256 1313 -1.368 fp32 512 256 1672 -0.0907 fp64 512 256 1672 4.5809 fp32 512 256 1851 -10.2862 fp64 512 256 1851 2.3119 fp32 512 256 1584 -10.3406 fp64 512 256 1584 1.4481 fp32 512 64 512 -6.3544 fp64 512 64 512 -18.4343 fp32 512 64 2048 -9.6639 fp64 512 64 2048 0.0714 fp32 512 64 8192 1.2097 fp64 512 64 8192 10.4839 fp32 512 64 1120 -20.1102 fp64 512 64 1120 -5.0784 fp32 512 64 1215 -6.4061 fp64 512 64 1215 -14.1781 fp32 512 64 1856 2.4221 fp64 512 64 1856 -8.4205 fp32 512 64 1302 -10.2403 fp64 512 64 1302 -7.0577 fp32 512 64 1329 -10.515 fp64 512 64 1329 -4.4899 fp32 512 64 1531 -18.4045 fp64 512 64 1531 9.5982 fp32 512 64 1313 -17.0858 fp64 512 64 1313 -2.02 fp32 512 64 1672 -6.1997 fp64 512 64 1672 1.1616 fp32 512 64 1851 -2.3241 fp64 512 64 1851 -1.0585 fp32 512 64 1584 -7.2199 fp64 512 64 1584 -1.0865 fp32 512 16 512 -14.8754 fp64 512 16 512 -10.987 fp32 512 16 2048 -18.3725 fp64 512 16 2048 1.5949 fp32 512 16 8192 2.163 fp64 512 16 8192 12.5431 fp32 512 16 1120 2.2558 fp64 512 16 1120 -2.7135 fp32 512 16 1215 -12.7228 fp64 512 16 1215 11.0343 fp32 512 16 1856 -6.986 fp64 512 16 1856 7.0687 fp32 512 16 1302 -9.3881 fp64 512 16 1302 -8.2974 fp32 512 16 1329 -11.5103 fp64 512 16 1329 18.9707 fp32 512 16 1531 -14.3721 fp64 512 16 1531 9.6774 fp32 512 16 1313 -16.5546 fp64 512 16 1313 11.7528 fp32 512 16 1672 -10.3689 fp64 512 16 1672 15.1197 fp32 512 16 1851 -11.6021 fp64 512 16 1851 8.2983 fp32 512 16 1584 -13.6702 fp64 512 16 1584 9.4635 fp32 2048 1024 512 -5.6482 fp64 2048 1024 512 1.45 fp32 2048 1024 2048 -0.066 fp64 2048 1024 2048 3.6549 fp32 2048 1024 8192 4.3953 fp64 2048 1024 8192 5.0636 fp32 2048 1024 1120 2.3119 fp64 2048 1024 1120 3.4102 fp32 2048 1024 1215 1.6251 fp64 2048 1024 1215 2.4538 fp32 2048 1024 1856 1.4219 fp64 2048 1024 1856 5.2966 fp32 2048 1024 1302 0.4938 fp64 2048 1024 1302 3.6871 fp32 2048 1024 1329 2.7753 fp64 2048 1024 1329 4.2955 fp32 2048 1024 1531 1.8766 fp64 2048 1024 1531 4.4579 fp32 2048 1024 1313 0.6639 fp64 2048 1024 1313 4.5556 fp32 2048 1024 1672 1.1072 fp64 2048 1024 1672 3.8653 fp32 2048 1024 1851 1.1566 fp64 2048 1024 1851 3.6434 fp32 2048 1024 1584 0.7806 fp64 2048 1024 1584 4.3265 fp32 2048 256 512 -10.7236 fp64 2048 256 512 1.011 fp32 2048 256 2048 2.2321 fp64 2048 256 2048 12.2771 fp32 2048 256 8192 8.0287 fp64 2048 256 8192 15.4497 fp32 2048 256 1120 -8.1388 fp64 2048 256 1120 5.8003 fp32 2048 256 1215 1.709 fp64 2048 256 1215 12.4369 fp32 2048 256 1856 5.1844 fp64 2048 256 1856 14.2236 fp32 2048 256 1302 2.8457 fp64 2048 256 1302 10.5728 fp32 2048 256 1329 -2.547 fp64 2048 256 1329 12.1123 fp32 2048 256 1531 2.4946 fp64 2048 256 1531 12.2398 fp32 2048 256 1313 6.1621 fp64 2048 256 1313 9.857 fp32 2048 256 1672 2.176 fp64 2048 256 1672 9.8899 fp32 2048 256 1851 4.6307 fp64 2048 256 1851 15.0223 fp32 2048 256 1584 3.5238 fp64 2048 256 1584 10.3181 fp32 2048 64 512 -11.5325 fp64 2048 64 512 8.5141 fp32 2048 64 2048 0.6066 fp64 2048 64 2048 25.8166 fp32 2048 64 8192 15.5994 fp64 2048 64 8192 29.453 fp32 2048 64 1120 1.5933 fp64 2048 64 1120 17.1686 fp32 2048 64 1215 -11.8064 fp64 2048 64 1215 21.7897 fp32 2048 64 1856 3.3061 fp64 2048 64 1856 17.6379 fp32 2048 64 1302 -1.201 fp64 2048 64 1302 26.775 fp32 2048 64 1329 -1.377 fp64 2048 64 1329 23.6142 fp32 2048 64 1531 0.9212 fp64 2048 64 1531 16.7177 fp32 2048 64 1313 2.8448 fp64 2048 64 1313 26.824 fp32 2048 64 1672 1.5334 fp64 2048 64 1672 23.7874 fp32 2048 64 1851 0.1934 fp64 2048 64 1851 25.1446 fp32 2048 64 1584 -2.8748 fp64 2048 64 1584 22.3902 fp32 8192 4096 512 0.0512 fp64 8192 4096 512 2.8049 fp32 8192 4096 2048 3.6683 fp64 8192 4096 2048 5.7372 fp32 8192 4096 8192 6.2501 fp64 8192 4096 8192 5.6644 fp32 8192 4096 1120 3.4347 fp64 8192 4096 1120 5.9099 fp32 8192 4096 1215 4.0591 fp64 8192 4096 1215 6.2049 fp32 8192 4096 1856 4.5046 fp64 8192 4096 1856 5.9 fp32 8192 4096 1302 3.8744 fp64 8192 4096 1302 5.74 fp32 8192 4096 1329 3.9169 fp64 8192 4096 1329 6.302 fp32 8192 4096 1531 5.0479 fp64 8192 4096 1531 6.048 fp32 8192 4096 1313 3.5261 fp64 8192 4096 1313 6.0544 fp32 8192 4096 1672 4.6081 fp64 8192 4096 1672 5.2568 fp32 8192 4096 1851 4.2022 fp64 8192 4096 1851 6.0934 fp32 8192 4096 1584 3.3852 fp64 8192 4096 1584 5.6772 fp32 8192 1024 512 3.7405 fp64 8192 1024 512 16.4627 fp32 8192 1024 2048 8.3918 fp64 8192 1024 2048 18.5254 fp32 8192 1024 8192 13.7773 fp64 8192 1024 8192 17.4314 fp32 8192 1024 1120 6.2023 fp64 8192 1024 1120 16.689 fp32 8192 1024 1215 9.5441 fp64 8192 1024 1215 19.7246 fp32 8192 1024 1856 9.864 fp64 8192 1024 1856 18.2895 fp32 8192 1024 1302 7.3145 fp64 8192 1024 1302 19.8528 fp32 8192 1024 1329 9.6131 fp64 8192 1024 1329 19.5526 fp32 8192 1024 1531 8.9847 fp64 8192 1024 1531 20.3696 fp32 8192 1024 1313 7.2819 fp64 8192 1024 1313 20.5361 fp32 8192 1024 1672 11.8095 fp64 8192 1024 1672 18.3047 fp32 8192 1024 1851 12.1042 fp64 8192 1024 1851 21.8124 fp32 8192 1024 1584 9.6549 fp64 8192 1024 1584 18.1818 fp32 8192 256 512 8.2649 fp64 8192 256 512 20.9372 fp32 8192 256 2048 15.6297 fp64 8192 256 2048 35.6407 fp32 8192 256 8192 21.7055 fp64 8192 256 8192 37.225 fp32 8192 256 1120 8.322 fp64 8192 256 1120 33.6497 fp32 8192 256 1215 12.9148 fp64 8192 256 1215 40.0554 fp32 8192 256 1856 12.2226 fp64 8192 256 1856 36.2642 fp32 8192 256 1302 12.2956 fp64 8192 256 1302 40.4711 fp32 8192 256 1329 10.2045 fp64 8192 256 1329 38.4891 fp32 8192 256 1531 14.9187 fp64 8192 256 1531 40.7874 fp32 8192 256 1313 9.5106 fp64 8192 256 1313 42.1367 fp32 8192 256 1672 15.2577 fp64 8192 256 1672 36.7527 fp32 8192 256 1851 15.668 fp64 8192 256 1851 40.2035 fp32 8192 256 1584 14.126 fp64 8192 256 1584 32.7602 --- tensorflow/core/kernels/BUILD | 4 +- .../core/kernels/segment_reduction_ops.cc | 128 +++++++++++++ .../core/kernels/segment_reduction_ops.h | 22 +++ .../kernels/segment_reduction_ops_gpu.cu.cc | 121 +++++++++++++ tensorflow/python/kernel_tests/BUILD | 4 +- .../segment_reduction_ops_test.py | 171 ++++++++++++------ 6 files changed, 397 insertions(+), 53 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index b51fc841d1..efc5d7c553 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2589,7 +2589,9 @@ tf_kernel_library( tf_kernel_library( name = "segment_reduction_ops", prefix = "segment_reduction_ops", - deps = MATH_DEPS, + deps = MATH_DEPS + if_cuda([ + ":cuda_solvers", + ]), ) tf_kernel_library( diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc index 9cdbe89457..8f7eff113c 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.cc +++ b/tensorflow/core/kernels/segment_reduction_ops.cc @@ -16,6 +16,9 @@ limitations under the License. // See docs in ../ops/math_ops.cc. #define EIGEN_USE_THREADS +#if GOOGLE_CUDA +#define EIGEN_USE_GPU +#endif // GOOGLE_CUDA #include "tensorflow/core/kernels/segment_reduction_ops.h" #include @@ -32,6 +35,15 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/util/util.h" + +#if GOOGLE_CUDA +#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/platform/cuda.h" + +using ::perftools::gputools::cuda::ScopedActivateExecutorContext; +#endif // GOOGLE_CUDA + namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -183,6 +195,105 @@ class SegmentReductionOp : public OpKernel { } }; +#ifdef GOOGLE_CUDA +// SegmentSumGPUOp is a segment sum operator implemented for GPU only. +// TODO: This implementation of SegmentSumGPUOp is sometimes slower than +// its unsorted counterpart (mostly when problem size is small). +// This is due to the following two main reasons and a cost-effective way +// to resolve these problems is desirable. +// 1. Sorted segment sum requires a memory transfer from device to host in +// order to know the size of the output dimension whereas unsorted segment +// sum receives the size of the output dimension as an input parameter. +// 2. Sorted segment sum is essentially a tiled version of unsorted segment +// sum and therefore such optimization comes at an inherent cost. However +// such cost may not be justified when the problem size is small. When to +// use the tiled version or the untiled version depends on many factors +// including data alignments, ratio of calculation to memory traffic and +// obviously, the problem sizes. +template +class SegmentSumGPUOp : public AsyncOpKernel { + public: + explicit SegmentSumGPUOp(OpKernelConstruction* context) + : AsyncOpKernel(context) {} + + void ComputeAsync(OpKernelContext* context, DoneCallback done) override { + const Tensor& input = context->input(0); + const Tensor& segment_ids = context->input(1); + + OP_REQUIRES_ASYNC( + context, TensorShapeUtils::IsVector(segment_ids.shape()), + errors::InvalidArgument("segment_ids should be a vector."), done); + + const int64 num_indices = segment_ids.NumElements(); + OP_REQUIRES_ASYNC( + context, num_indices == input.dim_size(0), + errors::InvalidArgument( + "segment_ids should be the same size as dimension 0 of" + " input."), + done); + + if (num_indices == 0) { + TensorShape output_shape = input.shape(); + output_shape.set_dim(0, 0); + + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC( + context, context->allocate_output(0, output_shape, &output), done); + done(); + return; + } + + perftools::gputools::DeviceMemoryBase output_rows_device( + (void*)(segment_ids.template flat().data() + (num_indices - 1))); + ScratchSpace output_rows_host(context, 1, /* on_host */ true); + + auto stream = context->op_device_context()->stream(); + OP_REQUIRES_ASYNC( + context, stream + ->ThenMemcpy(output_rows_host.mutable_data(), + output_rows_device, sizeof(Index)) + .ok(), + errors::Internal( + "SegmentSumGPUOp: failed to copy output_rows from device"), + done); + + functor::SegmentSumFunctor functor_; + auto create_and_check_output = [context, output_rows_host, &input, + &segment_ids, &functor_, done]() { + // Ensure that within the callback, the proper GPU settings are + // configured. + auto stream = context->op_device_context()->stream(); + ScopedActivateExecutorContext scoped_activation{stream->parent()}; + + Index output_rows = *output_rows_host.data(); + output_rows++; + OP_REQUIRES_ASYNC(context, output_rows > 0, + errors::InvalidArgument("segment ids must be >= 0"), + done); + + TensorShape output_shape = input.shape(); + output_shape.set_dim(0, output_rows); + + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC( + context, context->allocate_output(0, output_shape, &output), done); + + auto output_flat = output->flat_outer_dims(); + auto data_ptr = input.template flat().data(); + auto segment_flat = segment_ids.flat(); + functor_(context, context->eigen_device(), output_rows, + segment_ids.shape(), segment_flat, input.NumElements(), data_ptr, + output_flat); + + done(); + }; + + context->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute( + stream, create_and_check_output); + } +}; +#endif // GOOGLE_CUDA + #define REGISTER_CPU_KERNEL_SEGMENT(name, functor, type, index_type, \ default_value) \ REGISTER_KERNEL_BUILDER( \ @@ -227,6 +338,23 @@ REGISTER_COMPLEX_CPU_KERNELS_ALL(complex128); #undef REGISTER_REAL_CPU_KERNELS_ALL #undef REGISTER_COMPLEX_CPU_KERNELS_ALL +#if GOOGLE_CUDA +#define REGISTER_GPU_SORTED_KERNELS(type, index_type) \ + REGISTER_KERNEL_BUILDER(Name("SegmentSum") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .TypeConstraint("Tindices"), \ + SegmentSumGPUOp) + +#define REGISTER_GPU_SORTED_KERNELS_ALL(type) \ + REGISTER_GPU_SORTED_KERNELS(type, int32); \ + REGISTER_GPU_SORTED_KERNELS(type, int64); + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_SORTED_KERNELS_ALL); +#undef REGISTER_GPU_SORTED_KERNELS +#undef REGISTER_GPU_SORTED_KERNELS_ALL +#endif // GOOGLE_CUDA + namespace functor { // UnsortedSegmentSumFunctor implementation for CPUDevice. diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index ee09c213b7..412c1d601d 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -26,6 +26,28 @@ namespace tensorflow { class OpKernelContext; namespace functor { + +#ifdef GOOGLE_CUDA +typedef Eigen::GpuDevice GPUDevice; +// Functor for SegmentSumGPUOp. +// 'output_rows': the number of output segments (unique segment ids in +// 'segment_ids'). +// 'segment_ids_shape': shape of 'segment_ids' tensor. +// 'segment_ids': unsorted map from input to output segment ids at which to +// perform segment sum operation. +// 'data_size': size of input data tensor. +// 'data': input data tensor. +// 'output': output reshaped to {output_rows, output.size/output_rows} +template +struct SegmentSumFunctor { + void operator()(OpKernelContext* ctx, const GPUDevice& d, + const Index output_rows, const TensorShape& segment_ids_shape, + typename TTypes::ConstFlat segment_ids, + const Index data_size, const T* data, + typename TTypes::Tensor output); +}; +#endif + // BaseFunctor for definition of UnsorteSegmentReductionOp // for usage without templates. template diff --git a/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc index b132b1e8f8..26fcafee34 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc @@ -54,6 +54,77 @@ __device__ __forceinline__ void AccumulateInto( CudaAtomicAdd(dest_scalar + 1, value.imag()); } +// SortedSegmentSumFunctor kernel reduces input data just as +// UnsortedSegmentSumCustomKernel does except that input data +// is partitioned along the outer reduction dimension. This is +// because consecutive rows (elements in a row share the same +// outer dimension index) in the flattened 2D input data likely +// belong to the same segment in sorted segment sum operation. +// Therefore such partitioning strategy has two advantages over +// the UnsortedSegmentSumFunctor kernel: +// 1. Each thread reduces across multiple rows before writing +// answers to the global memory, we can therefore +// write reduction results to global memory less often. +// 2. We may know that the current thread is the only contributor +// to an output element because of the increasing nature of segment +// ids. In such cases, we do not need to use atomic operations +// to write results to global memory. +// In the flattened view of input data (with only outer and inner +// dimension), every thread processes a strip of input data of +// size OuterDimTileSize x 1. This strip runs across multiple +// rows of input data and all reduction elements share one inner +// dimension index. +template +__global__ void SortedSegmentSumCustomKernel(const Index input_outer_dim_size, + const Index inner_dim_size, + const Index output_outer_dim_size, + const Index* segment_ids, + const T* input, T* output, + const Index total_stripe_count) { + CUDA_1D_KERNEL_LOOP(stripe_index, total_stripe_count) { + const Index segment_offset = stripe_index % inner_dim_size; + const Index input_outer_dim_index_base = + stripe_index / inner_dim_size * Index(OuterDimTileSize); + + T sum = T(0); + Index first_segment_id = segment_ids[input_outer_dim_index_base]; + Index last_output_segment_id = output_outer_dim_size; + + const Index actual_stripe_height = + min(Index(OuterDimTileSize), + input_outer_dim_size - input_outer_dim_index_base); + for (Index j = 0; j < actual_stripe_height; j++) { + Index current_output_segment_id = + segment_ids[input_outer_dim_index_base + j]; + // Decide whether to write result to global memory. + // Result is only written to global memory if we move + // to another segment. Otherwise we can keep accumulating + // locally. + if (current_output_segment_id > last_output_segment_id) { + const Index output_index = + last_output_segment_id * inner_dim_size + segment_offset; + // decide whether to write result to global memory using atomic + // operations + if (last_output_segment_id == first_segment_id) { + AccumulateInto(output + output_index, sum); + } else { + *(output + output_index) = sum; + } + sum = T(0); + } + sum += ldg(input + (input_outer_dim_index_base + j) * inner_dim_size + + segment_offset); + last_output_segment_id = current_output_segment_id; + } + // For the last result in a strip, always write using atomic operations + // due to possible race conditions with threads computing + // the following strip. + const Index output_index = + last_output_segment_id * inner_dim_size + segment_offset; + AccumulateInto(output + output_index, sum); + } +} + // UnsortedSegmentSumFunctor kernel processes 'input_total_size' elements. // Each element is mapped from input to output by a combination of its // 'segment_ids' mapping and 'inner_dim_size'. @@ -80,6 +151,47 @@ __global__ void UnsortedSegmentSumCustomKernel( namespace functor { +template +void SegmentSumFunctor::operator()( + OpKernelContext* ctx, const GPUDevice& d, const Index output_rows, + const TensorShape& segment_ids_shape, + typename TTypes::ConstFlat segment_ids, const Index data_size, + const T* data, typename TTypes::Tensor output) { + if (output.size() == 0) { + return; + } + // Set 'output' to zeros. + CudaLaunchConfig config = GetCudaLaunchConfig(output.size(), d); + SetZero<<>>( + output.size(), output.data()); + if (data_size == 0 || segment_ids_shape.num_elements() == 0) { + return; + } + + // Launch kernel to compute sorted segment sum. + // Notes: + // *) 'input_total_size' is the total number of elements to process. + // *) 'segment_ids.shape' is a prefix of data's shape. + // *) 'input_outer_dim_size' is the total number of segments to process. + const Index input_total_size = data_size; + const Index input_outer_dim_size = segment_ids.dimension(0); + const Index input_inner_dim_size = input_total_size / input_outer_dim_size; + + const int OuterDimTileSize = 8; + + const Index input_outer_dim_num_stripe = + Eigen::divup(input_outer_dim_size, Index(OuterDimTileSize)); + + const Index total_stripe_count = + input_inner_dim_size * input_outer_dim_num_stripe; + + config = GetCudaLaunchConfig(total_stripe_count, d); + SortedSegmentSumCustomKernel<<< + config.block_count, config.thread_per_block, 0, d.stream()>>>( + input_outer_dim_size, input_inner_dim_size, output_rows, + segment_ids.data(), data, output.data(), total_stripe_count); +}; + // UnsortedSegmentSumFunctor implementation for GPUDevice. template struct UnsortedSegmentSumFunctor: UnsortedSegmentBaseFunctor { @@ -117,6 +229,15 @@ struct UnsortedSegmentSumFunctor: UnsortedSegmentBaseFuncto } }; +#define DEFINE_SORTED_GPU_SPECS_INDEX(T, Index) \ + template struct SegmentSumFunctor + +#define DEFINE_SORTED_GPU_SPECS(T) \ + DEFINE_SORTED_GPU_SPECS_INDEX(T, int32); \ + DEFINE_SORTED_GPU_SPECS_INDEX(T, int64); + +TF_CALL_GPU_NUMBER_TYPES(DEFINE_SORTED_GPU_SPECS); + #define DEFINE_GPU_SPECS_INDEX(T, Index) \ template struct UnsortedSegmentSumFunctor diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index e432998c21..9538be95e3 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -683,13 +683,15 @@ cuda_py_test( tf_py_test( name = "segment_reduction_ops_test", - size = "small", + size = "medium", srcs = ["segment_reduction_ops_test.py"], additional_deps = [ "//third_party/py/numpy", + "//tensorflow/python:client", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:math_ops", + "//tensorflow/python:variables", "//tensorflow/python:nn_grad", ], ) diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 33269c9123..5e426fc61a 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -18,12 +18,17 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import itertools + import numpy as np +from tensorflow.python.client import session +from tensorflow.python.framework import ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_lib from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test @@ -107,19 +112,19 @@ class SegmentReductionOpTest(SegmentReductionHelper): curr_ops_list = complex_ops_list else: curr_ops_list = ops_list - - with self.test_session(use_gpu=False): - tf_x, np_x = self._input(shape, dtype=dtype) - for np_op1, np_op2, tf_op in curr_ops_list: - np_ans = self._segmentReduce(indices, np_x, np_op1, np_op2) - s = tf_op(data=tf_x, segment_ids=indices) - tf_ans = s.eval() - self.assertAllClose(np_ans, tf_ans) - # NOTE(mrry): The static shape inference that computes - # `tf_ans.shape` can only infer that sizes from dimension 1 - # onwards, because the size of dimension 0 is data-dependent - # and may therefore vary dynamically. - self.assertAllEqual(np_ans.shape[1:], tf_ans.shape[1:]) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, np_x = self._input(shape, dtype=dtype) + for np_op1, np_op2, tf_op in curr_ops_list: + np_ans = self._segmentReduce(indices, np_x, np_op1, np_op2) + s = tf_op(data=tf_x, segment_ids=indices) + tf_ans = s.eval() + self.assertAllClose(np_ans, tf_ans) + # NOTE(mrry): The static shape inference that computes + # `tf_ans.shape` can only infer that sizes from dimension 1 + # onwards, because the size of dimension 0 is data-dependent + # and may therefore vary dynamically. + self.assertAllEqual(np_ans.shape[1:], tf_ans.shape[1:]) def testSegmentIdsShape(self): shape = [4, 4] @@ -130,41 +135,45 @@ class SegmentReductionOpTest(SegmentReductionHelper): def testSegmentIdsSize(self): shape = [4, 4] - with self.test_session(): - tf_x, _ = self._input(shape) - indices = [0, 1] - s = math_ops.segment_sum(data=tf_x, segment_ids=indices) - with self.assertRaisesOpError("segment_ids should be the same size"): - s.eval() + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, _ = self._input(shape) + indices = [0, 1] + s = math_ops.segment_sum(data=tf_x, segment_ids=indices) + with self.assertRaisesOpError("segment_ids should be the same size"): + s.eval() def testSegmentIdsValid(self): # This is a baseline for the following SegmentIdsInvalid* tests. shape = [4, 4] - with self.test_session(): - tf_x, _ = self._input(shape) - indices = [0, 0, 0, 1] - result = math_ops.segment_sum(data=tf_x, segment_ids=indices).eval() - self.assertAllEqual([[15, 18, 21, 24], [13, 14, 15, 16]], result) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, _ = self._input(shape, dtype=dtypes_lib.float32) + indices = [0, 0, 0, 1] + result = math_ops.segment_sum(data=tf_x, segment_ids=indices).eval() + self.assertAllEqual([[15, 18, 21, 24], [13, 14, 15, 16]], result) def testSegmentIdsGreaterThanZero(self): shape = [4, 4] - with self.test_session(): - tf_x, np_x = self._input(shape) - indices = [1, 1, 2, 2] - np_ans = self._segmentReduce(indices, np_x, np.add) - s = math_ops.segment_sum(data=tf_x, segment_ids=indices) - tf_ans = s.eval() - self.assertAllClose(np_ans, tf_ans) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, np_x = self._input(shape, dtype=dtypes_lib.float32) + indices = [1, 1, 2, 2] + np_ans = self._segmentReduce(indices, np_x, np.add) + s = math_ops.segment_sum(data=tf_x, segment_ids=indices) + tf_ans = s.eval() + self.assertAllClose(np_ans, tf_ans) def testSegmentIdsHole(self): shape = [4, 4] - with self.test_session(): - tf_x, np_x = self._input(shape) - indices = [0, 0, 3, 3] - np_ans = self._segmentReduce(indices, np_x, np.add) - s = math_ops.segment_sum(data=tf_x, segment_ids=indices) - tf_ans = s.eval() - self.assertAllClose(np_ans, tf_ans) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, np_x = self._input(shape, dtype=dtypes_lib.float32) + indices = [0, 0, 3, 3] + np_ans = self._segmentReduce(indices, np_x, np.add) + s = math_ops.segment_sum(data=tf_x, segment_ids=indices) + tf_ans = s.eval() + self.assertAllClose(np_ans, tf_ans) def testSegmentIdsInvalid1(self): shape = [4, 4] @@ -199,21 +208,23 @@ class SegmentReductionOpTest(SegmentReductionHelper): def testSegmentIdsInvalid4(self): shape = [4, 4] - with self.test_session(): - tf_x, _ = self._input(shape) - indices = [0, 0, 0, -1] - s = math_ops.segment_sum(data=tf_x, segment_ids=indices) - with self.assertRaisesOpError("segment ids must be >= 0"): - s.eval() + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, _ = self._input(shape, dtype=dtypes_lib.float32) + indices = [0, 0, 0, -1] + s = math_ops.segment_sum(data=tf_x, segment_ids=indices) + with self.assertRaisesOpError("segment ids must be >= 0"): + s.eval() def testSegmentIdsInvalid5(self): shape = [4, 4] - with self.test_session(): - tf_x, _ = self._input(shape) - indices = [0, 0, 0, -2] - s = math_ops.segment_sum(data=tf_x, segment_ids=indices) - with self.assertRaisesOpError("segment ids must be >= 0"): - s.eval() + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, _ = self._input(shape, dtype=dtypes_lib.float32) + indices = [0, 0, 0, -2] + s = math_ops.segment_sum(data=tf_x, segment_ids=indices) + with self.assertRaisesOpError("segment ids must be >= 0"): + s.eval() def testGradient(self): shape = [4, 4] @@ -341,7 +352,7 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): with self.test_session(use_gpu=True): tf_x, np_x = self._input(shape, dtype=dtypes_lib.float64) s = math_ops.unsorted_segment_max(data=tf_x, segment_ids=indices, - num_segments=num_segments) + num_segments=num_segments) jacob_t, jacob_n = gradient_checker.compute_gradient( tf_x, shape, @@ -635,6 +646,64 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper): with self.assertRaisesOpError(r"Segment id 0 out of range \[0, 0\)"): s.eval() +class SegmentReductionOpBenchmark(test.Benchmark): + outer_dim_options = [2**x for x in range(9, 14, 2)] + ratio_options = [2**x for x in range(1, 6, 2)] + inner_dim_options = [2**x for x in range(9, 14, 2)] + #randomly generated sizes with less alignments + inner_dim_options += [1120, 1215, 1856, 1302, 1329, 1531, 1313, 1672, 1851, 1584] + dtype_options = [np.float32, np.float64] + options = (outer_dim_options, + ratio_options, inner_dim_options, dtype_options) + op_functors = [lambda vc, vs, seg_ids: + ("sorted", math_ops.segment_sum(vc, vs)), + lambda vc, vs, seg_ids: + ("unsorted", math_ops.unsorted_segment_sum(vc, vs, seg_ids[-1]+1))] + repeat = 10 + + def _npTypeToStr(self, t): + if t == np.float32: + return "fp32" + if t == np.float64: + return "fp64" + + def _runGraph(self, op_functor, outer_dim, ratio, inner_dim, dtype): + output_outer_dim = int(outer_dim/ratio) + const = np.random.randint(5, size=(outer_dim, inner_dim)) + seg_ids = np.sort(np.random.randint( + output_outer_dim, size=outer_dim)) + vs = variables.Variable(seg_ids.astype(np.int32)) + with ops.device("/gpu:0"): + vc = variables.Variable(const.astype(dtype)) + name, op = op_functor(vc, vs, seg_ids) + with session.Session() as sess: + variables.global_variables_initializer().run() + r = self.run_op_benchmark(sess, op, min_iters=self.repeat, + name="_".join(map(str, + [name, + outer_dim, + ratio, + inner_dim, + self._npTypeToStr(dtype)]))) + return name, r["wall_time"] + + def benchmarkSegmentSumGPU(self): + if not test.is_gpu_available(cuda_only=True): + return + for outer_dim, ratio, inner_dim, dtype in itertools.product(*self.options): + output_outer_dim = int(outer_dim/ratio) + op_functor = self.op_functors[0] + with ops.Graph().as_default(): + self._runGraph(op_functor, outer_dim, ratio, inner_dim, dtype) + + def benchmarkUnsortedSegmentSumGPU(self): + if not test.is_gpu_available(cuda_only=True): + return + for outer_dim, ratio, inner_dim, dtype in itertools.product(*self.options): + output_outer_dim = int(outer_dim/ratio) + op_functor = self.op_functors[1] + with ops.Graph().as_default(): + self._runGraph(op_functor, outer_dim, ratio, inner_dim, dtype) if __name__ == "__main__": test.main() -- GitLab From 9819cba65b14966a9b48faf63d74b09c0e727ae4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 09:33:24 -0700 Subject: [PATCH 261/294] Deploy maven artifacts to Bintray as well. PiperOrigin-RevId: 167589397 --- tensorflow/java/maven/README.md | 146 +++++++++++++----- tensorflow/java/maven/pom.xml | 52 ++++--- tensorflow/java/maven/release.sh | 2 + tensorflow/java/maven/run_inside_container.sh | 81 ++++++++-- 4 files changed, 205 insertions(+), 76 deletions(-) diff --git a/tensorflow/java/maven/README.md b/tensorflow/java/maven/README.md index 17bb799961..6227775361 100644 --- a/tensorflow/java/maven/README.md +++ b/tensorflow/java/maven/README.md @@ -1,11 +1,13 @@ # TensorFlow for Java using Maven -The [TensorFlow Java -API](https://www.tensorflow.org/api_docs/java/reference/org/tensorflow/package-summary) -is available through artifacts uploaded to [Maven -Central](https://oss.sonatype.org/content/repositories/snapshots/org/tensorflow/). -This document describes the process of updating the release artifacts. It does -_not_ describe how to use the artifacts, for which the reader is referred to the +The +[TensorFlow Java API](https://www.tensorflow.org/api_docs/java/reference/org/tensorflow/package-summary) +is available on Maven Central and JCenter through artifacts uploaded to +[OSS Sonatype](https://oss.sonatype.org/content/repositories/releases/org/tensorflow/) and +[Bintray](https://bintray.com/google/tensorflow/tensorflow) respectively. This +document describes the process of updating the release artifacts. It does _not_ +describe how to use the artifacts, for which the reader is referred to +the [TensorFlow for Java installation instructions](https://www.tensorflow.org/code/tensorflow/java/README.md). ## Background @@ -20,7 +22,7 @@ Hence, the process for building and uploading release artifacts is not a single ## Artifact Structure -There are five artifacts and thus `pom.xml`s involved in this release: +There are six artifacts and thus `pom.xml`s involved in this release: 1. `tensorflow`: The single dependency for projects requiring TensorFlow for Java. This convenience package depends on the two below, and is the one that @@ -37,16 +39,22 @@ There are five artifacts and thus `pom.xml`s involved in this release: 4. `proto`: Generated Java code for TensorFlow protocol buffers (e.g., `MetaGraphDef`, `ConfigProto` etc.) -5. [`parentpom`](https://maven.apache.org/pom/index.html): Common settings +5. `tensorflow-android`: A package geared towards + supporting [TensorFlow on Android](../../contrib/android/README.md), and is + a self-contained Android AAR library containing all necessary native and + Java code. + +6. [`parentpom`](https://maven.apache.org/pom/index.html): Common settings shared by all of the above. + ## Updating the release -The TensorFlow artifacts at Maven Central are created from files built as part -of the TensorFlow release process (which uses `bazel`). The author's lack of -familiarity with Maven best practices combined with the use of a different build -system means that this process is possibly not ideal, but it's what we've got. -Suggestions are welcome. +The Maven artifacts are created from files built as part of the TensorFlow +release process (which uses `bazel`). The author's lack of familiarity with +Maven best practices combined with the use of a different build system means +that this process is possibly not ideal, but it's what we've got. Suggestions +are welcome. In order to isolate the environment used for building, all release processes are conducted in a [Docker](https://www.docker.com) container. @@ -59,16 +67,28 @@ conducted in a [Docker](https://www.docker.com) container. account does not have permissions, then you'll need to ask someone who does to [file a ticket](https://issues.sonatype.org/) to add to the permissions ([sample ticket](https://issues.sonatype.org/browse/MVNCENTRAL-1637)). -- A GPG signing key, required [to sign the release artifacts](http://central.sonatype.org/pages/apache-maven.html#gpg-signed-components). - -### Deploying to Maven Central - -1. Create a file with your OSSRH credentials (or perhaps you use `mvn` and have - it in `~/.m2/settings.xml`): +- An account at [bintray.com](https://bintray.com) that has permissions to + update the [tensorflow repository](https://bintray.com/google/tensorflow). + If your account does not have permissions, then you'll need to ask one of + the [organization administrators](https://bintray.com/google) to give you + permissions to update the `tensorflow` repository. Please keep the + [repository option](https://bintray.com/google/tensorflow/edit?tab=general) + to *"GPG sign uploaded files using Bintray's public/private key pair"* + **unchecked**, otherwise it will conflict with locally signed artifacts. +- A GPG signing key, required + [to sign the release artifacts](http://central.sonatype.org/pages/apache-maven.html#gpg-signed-components). + +### Deploying to Sonatype and Bintray + +1. Create a file with your OSSRH credentials and + [Bintray API key](https://bintray.com/docs/usermanual/interacting/interacting_interacting.html#anchorAPIKEY) + (or perhaps you use `mvn` and have it in `~/.m2/settings.xml`): ```sh SONATYPE_USERNAME="your_sonatype.org_username_here" SONATYPE_PASSWORD="your_sonatype.org_password_here" + BINTRAY_USERNAME="your_bintray_username_here" + BINTRAY_API_KEY="your_bintray_api_key_here" GPG_PASSPHRASE="your_gpg_passphrase_here" cat >/tmp/settings.xml < @@ -78,19 +98,16 @@ conducted in a [Docker](https://www.docker.com) container. ${SONATYPE_USERNAME} ${SONATYPE_PASSWORD} + + bintray + ${BINTRAY_USERNAME} + ${BINTRAY_API_KEY} + - - - ossrh - - true - - - gpg2 - ${GPG_PASSPHRASE} - - - + + gpg2 + ${GPG_PASSPHRASE} + EOF ``` @@ -98,30 +115,83 @@ conducted in a [Docker](https://www.docker.com) container. 2. Run the `release.sh` script. 3. If the script above succeeds then the artifacts would have been uploaded to - the private staging repository. After verifying the release, visit - https://oss.sonatype.org/#stagingRepositories, find the `org.tensorflow` + the private staging repository in Sonatype, and as unpublished artifacts in + Bintray. After verifying the release, you should finalize or abort the + release on both sites. + +4. Visit https://oss.sonatype.org/#stagingRepositories, find the `org.tensorflow` release and click on either `Release` to finalize the release, or `Drop` to - abort. Some things of note: + abort. + +5. Visit https://bintray.com/google/tensorflow/tensorflow, and select the + version you just uploaded. Notice there's a message about unpublished + artifacts. Click on either `Publish` to finalize the release, or `Discard` + to abort. +6. Some things of note: - For details, look at the [Sonatype guide](http://central.sonatype.org/pages/releasing-the-deployment.html). - Syncing with [Maven Central](http://repo1.maven.org/maven2/org/tensorflow/) can take 10 minutes to 2 hours (as per the [OSSRH guide](http://central.sonatype.org/pages/ossrh-guide.html#releasing-to-central)). + - For Bintray details, refer to their guide on + [managing uploaded content](https://bintray.com/docs/usermanual/uploads/uploads_managinguploadedcontent.html#_publishing). -4. Upon successful release, commit changes to all the `pom.xml` files +7. Upon successful release, commit changes to all the `pom.xml` files (which should have the updated version number). ### Snapshots If the `TF_VERSION` provided to the `release.sh` script ends in `-SNAPSHOT`, then instead of using official release files, the nightly build artifacts from -https://ci.tensorflow.org/view/Nightly/job/nightly-libtensorflow/ and -https://ci.tensorflow.org/view/Nightly/job/nightly-libtensorflow-windows/ will -be used to upload to the Maven Central snapshots repository. +https://ci.tensorflow.org/view/Nightly/job/nightly-libtensorflow/, +https://ci.tensorflow.org/view/Nightly/job/nightly-libtensorflow-windows/ and +https://ci.tensorflow.org/view/Nightly/job/nightly-android +will be used to upload to the Maven Central snapshots repository. (Note that +snapshots are only uploaded to Maven Central, not Bintray.) + +### Skip deploying to a repository + +Should you need, setting environment variables `DEPLOY_OSSRH=0` or +`DEPLOY_BINTRAY=0` when calling `release.sh` will skip deploying to OSSRH or +Bintray respectively. Note that snapshots are only uploaded to OSSRH, so you +cannot skip deploying to OSSRH for a `-SNAPSHOT` version. + +## The overall flow + +This section provides some pointers around how artifacts are currently +assembled. + +All native and java code is first built and tested on +a [Tensorflow Jenkins server](https://ci.tensorflow.org/) which run various +scripts under the [`tools/ci_build`](../../tools/ci_build/) directory. Of +particular interest may be `tools/ci_build/builds/libtensorflow.sh` which +bundles Java-related build sources and outputs into archives, and +`tools/ci_build/builds/android_full.sh` which produces an Android AAR package. + +Maven artifacts however are not created in Jenkins. Instead, artifacts are +created and deployed externally on-demand, when a maintainer runs the +`release.sh` script. + +This script spins up a Docker instance which downloads the archives created by +successful runs of various `tools/ci_build` scripts on the Tensorflow Jenkins +server. + +It organizes these archives locally into a maven-friendly layout, and runs `mvn +deploy` to create maven artifacts within the container. Native libraries built +in Jenkins are used as-is, but srcjars for java code are used to compile class +files and generate javadocs.) It also downloads the Android AAR from the Jenkins +server and directly deploys it via `mvn gpg:sign-and-deploy-file`. + +`release.sh` then stages these artifacts to OSSRH and Bintray, and if all goes +well a maintainer can log into both sites to promote them as a new release. +There is a small change to the flow for a standard (rather than a `-SNAPSHOT`) +release. Rather than downloading archives directly from jobs on the Jenkins +server, the script uses a static repository of QA-blessed archives. ## References - [Sonatype guide](http://central.sonatype.org/pages/ossrh-guide.html) for hosting releases. - [Ticket that created the `org/tensorflow` configuration](https://issues.sonatype.org/browse/OSSRH-28072) on OSSRH. +- The [Bintray User Manual](https://bintray.com/docs/usermanual/index.html) diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index cc4fbc4a75..0a3552d756 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -33,18 +33,35 @@ proto - - - - - ossrh - https://oss.sonatype.org/content/repositories/snapshots - - + + + ossrh - https://oss.sonatype.org/service/local/staging/deploy/maven2/ - - + + + + ossrh + https://oss.sonatype.org/content/repositories/snapshots + + + ossrh + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + bintray + + + + bintray + https://api.bintray.com/maven/google/tensorflow/tensorflow/;publish=0 + + + + @@ -55,19 +72,6 @@ - - - org.sonatype.plugins - nexus-staging-maven-plugin - 1.6.7 - true - - ossrh - https://oss.sonatype.org/ - - false - - org.apache.maven.plugins diff --git a/tensorflow/java/maven/release.sh b/tensorflow/java/maven/release.sh index b95a4d4674..9012ea14ea 100755 --- a/tensorflow/java/maven/release.sh +++ b/tensorflow/java/maven/release.sh @@ -49,6 +49,8 @@ fi set -ex docker run \ -e TF_VERSION="${TF_VERSION}" \ + -e DEPLOY_OSSRH="${DEPLOY_OSSRH:-true}" \ + -e DEPLOY_BINTRAY="${DEPLOY_BINTRAY:-true}" \ -v ${PWD}:/tensorflow \ -v "${SETTINGS_XML}":/root/.m2/settings.xml \ -v ${HOME}/.gnupg:/root/.gnupg \ diff --git a/tensorflow/java/maven/run_inside_container.sh b/tensorflow/java/maven/run_inside_container.sh index 6b4d5d7032..a2ce097195 100644 --- a/tensorflow/java/maven/run_inside_container.sh +++ b/tensorflow/java/maven/run_inside_container.sh @@ -19,11 +19,23 @@ RELEASE_URL_PREFIX="https://storage.googleapis.com/tensorflow/libtensorflow" + +# By default we deploy to both ossrh and bintray. These two +# environment variables can be set to skip either repository. +DEPLOY_BINTRAY="${DEPLOY_BINTRAY:-true}" +DEPLOY_OSSRH="${DEPLOY_OSSRH:-true}" + IS_SNAPSHOT="false" if [[ "${TF_VERSION}" == *"-SNAPSHOT" ]]; then IS_SNAPSHOT="true" + # Bintray does not allow snapshots. + DEPLOY_BINTRAY="false" fi PROTOC_RELEASE_URL="https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip" +if [[ "${DEPLOY_BINTRAY}" != "true" && "${DEPLOY_OSSRH}" != "true" ]]; then + echo "Must deploy to at least one of Bintray or OSSRH" >&2 + exit 2 +fi set -ex @@ -39,6 +51,20 @@ update_version_in_pom() { mvn versions:set -DnewVersion="${TF_VERSION}" } +# Fetch a property from pom files for a given profile. +# Arguments: +# profile - name of the selected profile. +# property - name of the property to be retrieved. +# Output: +# Echo property value to stdout +mvn_property() { + local profile="$1" + local prop="$2" + mvn -q --non-recursive exec:exec -P "${profile}" \ + -Dexec.executable='echo' \ + -Dexec.args="\${${prop}}" +} + download_libtensorflow() { if [[ "${IS_SNAPSHOT}" == "true" ]]; then URL="http://ci.tensorflow.org/view/Nightly/job/nightly-libtensorflow/TYPE=cpu-slave/lastSuccessfulBuild/artifact/lib_package/libtensorflow-src.jar" @@ -137,29 +163,50 @@ generate_java_protos() { rm -rf "${DIR}/proto/tmp" } +# Deploy artifacts using a specific profile. +# Arguments: +# profile - name of selected profile. +# Outputs: +# n/a +deploy_profile() { + local profile="$1" + # Deploy the non-android pieces. + mvn deploy -P"${profile}" + # Determine the correct pom file property to use + # for the repository url. + local rtype + if [[ "${IS_SNAPSHOT}" == "true" ]]; then + rtype='snapshotRepository' + else + rtype='repository' + fi + local url=$(mvn_property "${profile}" "project.distributionManagement.${rtype}.url") + local repositoryId=$(mvn_property "${profile}" "project.distributionManagement.${rtype}.id") + mvn gpg:sign-and-deploy-file \ + -Dfile="${DIR}/tensorflow-android/target/tensorflow.aar" \ + -DpomFile="${DIR}/tensorflow-android/target/pom-android.xml" \ + -Durl="${url}" \ + -DrepositoryId="${repositoryId}" +} + # If successfully built, try to deploy. # If successfully deployed, clean. # If deployment fails, debug with # ./release.sh ${TF_VERSION} ${SETTINGS_XML} bash # To get a shell to poke around the maven artifacts with. deploy_artifacts() { - # This deploys the non-android pieces - mvn deploy - - # Sign and deploy the previously downloaded aar file as a single - # maven artifact. - if [[ "${IS_SNAPSHOT}" == "true" ]]; then - REPO="https://oss.sonatype.org/content/repositories/snapshots" - else - REPO="https://oss.sonatype.org/service/local/staging/deploy/maven2/" + # Deploy artifacts to ossrh if requested. + if [[ "${DEPLOY_OSSRH}" == "true" ]]; then + deploy_profile 'ossrh' + fi + # Deploy artifacts to bintray if requested. + if [[ "${DEPLOY_BINTRAY}" == "true" ]]; then + deploy_profile 'bintray' fi - mvn gpg:sign-and-deploy-file -Dfile="${DIR}/tensorflow-android/target/tensorflow.aar" -DpomFile="${DIR}/tensorflow-android/target/pom-android.xml" -Durl=${REPO} -DrepositoryId=ossrh - # Clean up when everything works clean } - if [ -z "${TF_VERSION}" ] then echo "Must set the TF_VERSION environment variable" @@ -189,8 +236,14 @@ set +ex if [[ "${IS_SNAPSHOT}" == "false" ]]; then echo "Uploaded to the staging repository" echo "After validating the release: " - echo "1. Login to https://oss.sonatype.org/#stagingRepositories" - echo "2. Find the 'org.tensorflow' staging release and click either 'Release' to release or 'Drop' to abort" + if [[ "${DEPLOY_OSSRH}" == "true" ]]; then + echo "* Login to https://oss.sonatype.org/#stagingRepositories" + echo "* Find the 'org.tensorflow' staging release and click either 'Release' to release or 'Drop' to abort" + fi + if [[ "${DEPLOY_BINTRAY}" == "true" ]]; then + echo "* Login to https://bintray.com/google/tensorflow/tensorflow" + echo "* Either 'Publish' unpublished items to release, or 'Discard' to abort" + fi else echo "Uploaded to the snapshot repository" fi -- GitLab From a3ee46fb710c75f6f52271463f5efe758306377b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 09:56:43 -0700 Subject: [PATCH 262/294] Enable contrib.nccl test. PiperOrigin-RevId: 167592162 --- tensorflow/contrib/nccl/BUILD | 41 ++++++++++--------- .../contrib/nccl/python/ops/nccl_ops_test.py | 9 ++-- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD index 338181e4ca..d6508362b8 100644 --- a/tensorflow/contrib/nccl/BUILD +++ b/tensorflow/contrib/nccl/BUILD @@ -48,6 +48,8 @@ tf_cuda_cc_test( # Disabled on jenkins until errors finding nvmlShutdown are found. tags = [ "manual", + "no_oss", + "noguitar", # note: is run manually there "notap", ], deps = if_cuda( @@ -112,25 +114,26 @@ tf_custom_op_py_library( ], ) -# http://b/62064807 -# cuda_py_test( -# name = "nccl_ops_test", -# size = "small", -# srcs = ["python/ops/nccl_ops_test.py"], -# additional_deps = [ -# ":nccl_py", -# "//tensorflow/python:array_ops", -# "//tensorflow/python:client_testlib", -# "//tensorflow/python:framework_for_generated_wrappers", -# "//tensorflow/python:framework_test_lib", -# "//tensorflow/python:platform_test", -# ], -# # Disabled on jenkins until errors finding nvmlShutdown are found. -# tags = [ -# "manual", -# "notap", -# ], -# ) +cuda_py_test( + name = "nccl_ops_test", + size = "small", + srcs = ["python/ops/nccl_ops_test.py"], + additional_deps = [ + ":nccl_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + ], + # Disabled on jenkins until errors finding nvmlShutdown are found. + tags = [ + "manual", + "no_oss", + "noguitar", # note: is run manually there + "notap", + ], +) filegroup( name = "all_files", diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py index ae658e7322..1621e9f28e 100644 --- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py +++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py @@ -43,7 +43,8 @@ class AllReduceTest(test.TestCase): self._testSingleAllReduce(sess, dtype, nccl.all_max, np.maximum) def _testSingleAllReduce(self, sess, np_type, nccl_fn, numpy_accumulation_fn): - for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]: + for devices in [['/device:GPU:1', '/device:GPU:2', '/device:GPU:0'], + ['/device:GPU:1', '/device:GPU:0']]: shape = (3, 4) np_ans = None tensors = [] @@ -84,7 +85,8 @@ class BroadcastTest(test.TestCase): # Create session inside outer loop to test use of # same communicator across multiple sessions. with self.test_session(use_gpu=True) as sess: - for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]: + for devices in [['/device:GPU:1', '/device:GPU:0', '/device:GPU:2'], + ['/device:GPU:1', '/device:GPU:0']]: shape = (3, 4) sender = np.random.randint(0, len(devices) - 1) with ops.device(devices[sender]): @@ -115,7 +117,8 @@ class CombinedTest(test.TestCase): # Create session inside outer loop to test use of # same communicator across multiple sessions. with self.test_session(use_gpu=True) as sess: - for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]: + for devices in [['/device:GPU:1', '/device:GPU:2', '/device:GPU:0'], + ['/device:GPU:0', '/device:GPU:1']]: shape = (3, 4) # all-reduce -- GitLab From 370c9b8ae787cf56cf5bc5586876fac50867edb4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 10:01:23 -0700 Subject: [PATCH 263/294] Unify usage of eager/graph cell and embedding_lookup PiperOrigin-RevId: 167592814 --- tensorflow/core/kernels/resource_variable_ops.cc | 9 +++++++++ tensorflow/python/ops/rnn_cell_impl.py | 6 ++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index 12b10d61da..98f3718c12 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -431,7 +431,16 @@ class ResourceGatherOp : public OpKernel { TF_CALL_ALL_TYPES(REGISTER_GATHER_CPU); TF_CALL_QUANTIZED_TYPES(REGISTER_GATHER_CPU); +// Registers GPU kernels. +#if GOOGLE_CUDA +#define REGISTER_GATHER_GPU(type) REGISTER_GATHER_ALL_INDICES(GPU, type) + +TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_GATHER_GPU); + +#endif // GOOGLE_CUDA + #undef REGISTER_GATHER_CPU +#undef REGISTER_GATHER_GPU #undef REGISTER_GATHER_ALL_INDICES #undef REGISTER_GATHER_FULL diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py index b1626feb27..25a0ad0a37 100644 --- a/tensorflow/python/ops/rnn_cell_impl.py +++ b/tensorflow/python/ops/rnn_cell_impl.py @@ -28,6 +28,7 @@ import collections import hashlib import numbers +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -124,9 +125,10 @@ def _zero_state_tensors(state_size, batch_size, dtype): def get_state_shape(s): """Combine s with batch_size to get a proper tensor shape.""" c = _concat(batch_size, s) - c_static = _concat(batch_size, s, static=True) size = array_ops.zeros(c, dtype=dtype) - size.set_shape(c_static) + if context.in_graph_mode(): + c_static = _concat(batch_size, s, static=True) + size.set_shape(c_static) return size return nest.map_structure(get_state_shape, state_size) -- GitLab From 9eb8dd7d64d36f6ae0b268098597994bb7e6ffbc Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Tue, 5 Sep 2017 10:02:11 -0700 Subject: [PATCH 264/294] Mark tf.contrib.stateless.* ops as NotDifferentiable PiperOrigin-RevId: 167592965 --- tensorflow/contrib/stateless/BUILD | 1 + tensorflow/contrib/stateless/__init__.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/tensorflow/contrib/stateless/BUILD b/tensorflow/contrib/stateless/BUILD index 598e6513ae..865fb72a55 100644 --- a/tensorflow/contrib/stateless/BUILD +++ b/tensorflow/contrib/stateless/BUILD @@ -21,6 +21,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":stateless_random_ops", + "//tensorflow/python:framework", "//tensorflow/python:util", ], ) diff --git a/tensorflow/contrib/stateless/__init__.py b/tensorflow/contrib/stateless/__init__.py index 82e5d36ce4..ca937546f5 100644 --- a/tensorflow/contrib/stateless/__init__.py +++ b/tensorflow/contrib/stateless/__init__.py @@ -34,5 +34,11 @@ from __future__ import print_function # pylint: disable=wildcard-import from tensorflow.contrib.stateless.gen_stateless_random_ops import * +from tensorflow.python.framework import ops from tensorflow.python.util.all_util import remove_undocumented + +ops.NotDifferentiable("StatelessRandomNormal") +ops.NotDifferentiable("StatelessRandomUniform") +ops.NotDifferentiable("StatelessTruncatedNormal") + remove_undocumented(__name__) -- GitLab From 6a7bade1e62fbc7d800643fbc3f81d77036d8f90 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Tue, 5 Sep 2017 10:15:51 -0700 Subject: [PATCH 265/294] Error out when TPUEstimator is passed to Experiment. PiperOrigin-RevId: 167594863 --- tensorflow/contrib/learn/BUILD | 1 + .../contrib/learn/python/learn/experiment.py | 9 +++++++++ .../learn/python/learn/experiment_test.py | 16 ++++++++++++++++ tensorflow/contrib/tpu/BUILD | 1 - 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 6fbe204ec6..db3be9a991 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -36,6 +36,7 @@ py_library( "//tensorflow/contrib/rnn:rnn_py", "//tensorflow/contrib/session_bundle:exporter", "//tensorflow/contrib/session_bundle:gc", + "//tensorflow/contrib/tpu:tpu_estimator", "//tensorflow/contrib/training:training_py", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index c35a493086..627d4991f0 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -33,6 +33,7 @@ from tensorflow.contrib.learn.python.learn import export_strategy from tensorflow.contrib.learn.python.learn import monitors from tensorflow.contrib.learn.python.learn import trainable from tensorflow.contrib.learn.python.learn.estimators import run_config +from tensorflow.contrib.tpu.python.tpu import tpu_estimator from tensorflow.python.estimator import estimator as core_estimator from tensorflow.python.framework import ops from tensorflow.python.platform import tf_logging as logging @@ -221,6 +222,14 @@ class Experiment(object): "`estimator` must implement `tf.contrib.learn.Trainable`" "or `tf.estimator.`Estimator`.") + if isinstance(estimator, tpu_estimator.TPUEstimator): + raise ValueError( + "`Experiment` class cannot work with `tf.contrib.tpu.TPUEstimator`. " + "Please call `TPUEstimator` train/evaluate directly. \n" + "Details: `Experiment` class is designed for between-graph " + "distributed training, while `TPUEstimator` is working in in-graph " + "distributed mode.") + super(Experiment, self).__init__() # Immutable fields. self._estimator = estimator diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py index fe40d27c44..2c68edbb34 100644 --- a/tensorflow/contrib/learn/python/learn/experiment_test.py +++ b/tensorflow/contrib/learn/python/learn/experiment_test.py @@ -32,6 +32,8 @@ from tensorflow.contrib.learn.python.learn.estimators import dnn from tensorflow.contrib.learn.python.learn.estimators import run_config as run_config_lib from tensorflow.contrib.learn.python.learn.estimators import test_data from tensorflow.contrib.learn.python.learn.utils import saved_model_export_utils +from tensorflow.contrib.tpu.python.tpu import tpu_config +from tensorflow.contrib.tpu.python.tpu import tpu_estimator from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.estimator import estimator as core_estimator @@ -935,6 +937,20 @@ class ExperimentTest(test.TestCase): self.assertEqual(ex._maybe_export.call_count, 4) self.assertEqual(ex._call_evaluate.call_count, 4) + def test_fail_with_tpu_estimator(self): + def dummy_model_fn(features, labels): + del features, labels # unused + + with self.assertRaisesRegexp( + ValueError, + '`Experiment` class cannot work with `tf.contrib.tpu.TPUEstimator`'): + experiment.Experiment( + tpu_estimator.TPUEstimator(model_fn=dummy_model_fn, + config=tpu_config.RunConfig(), + train_batch_size=256), + train_input_fn='train_input', + eval_input_fn='eval_input') + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 7d1325e046..c952288704 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -39,7 +39,6 @@ py_library( deps = [ ":tpu_lib", ":tpu_py", - "//tensorflow/contrib/learn", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_for_generated_wrappers", -- GitLab From c1558f738fc269db521c9b00379cb4e7228ce0a3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 10:31:36 -0700 Subject: [PATCH 266/294] Fix parsing of SequenceExample for FixedLenSequenceFeature in FeatureColumn. PiperOrigin-RevId: 167597172 --- tensorflow/contrib/layers/python/layers/feature_column.py | 6 +++--- .../contrib/layers/python/layers/feature_column_test.py | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py index 598d9aee02..da16bf6ce6 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column.py +++ b/tensorflow/contrib/layers/python/layers/feature_column.py @@ -2559,10 +2559,10 @@ def _create_sequence_feature_spec_for_parsing(sequence_feature_columns, feature_spec = create_feature_spec_for_parsing(sequence_feature_columns) sequence_feature_spec = {} for key, feature in feature_spec.items(): - if (isinstance(feature, parsing_ops.VarLenFeature) or - isinstance(feature, parsing_ops.FixedLenSequenceFeature)): + if isinstance(feature, parsing_ops.VarLenFeature): sequence_feature = feature - elif isinstance(feature, parsing_ops.FixedLenFeature): + elif (isinstance(feature, parsing_ops.FixedLenFeature) or + isinstance(feature, parsing_ops.FixedLenSequenceFeature)): default_is_set = feature.default_value is not None if default_is_set: logging.warning( diff --git a/tensorflow/contrib/layers/python/layers/feature_column_test.py b/tensorflow/contrib/layers/python/layers/feature_column_test.py index 21ab986710..ab65e47af8 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column_test.py +++ b/tensorflow/contrib/layers/python/layers/feature_column_test.py @@ -912,8 +912,7 @@ class FeatureColumnTest(test.TestCase): parsing_ops.VarLenFeature(dtype=dtypes.float32), "real_valued_var_len_dense_column": parsing_ops.FixedLenSequenceFeature( - shape=[], dtype=dtypes.float32, allow_missing=True, - default_value=4.0), + shape=[], dtype=dtypes.float32, allow_missing=True), } self.assertDictEqual(expected_feature_spec, feature_spec) -- GitLab From 78c50319875922185a762615a1e798d5f025e6b4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 10:38:16 -0700 Subject: [PATCH 267/294] Disable msan for python/kernel_tests:matrix_solve_ls_op_test PiperOrigin-RevId: 167598189 --- tensorflow/python/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index ed89a6dc48..107f5a6a98 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -528,6 +528,7 @@ tf_py_test( "//tensorflow/python:linalg_ops", "//tensorflow/python:math_ops", ], + tags = ["nomsan"], # fails in msan from numpy calls ) tf_py_test( -- GitLab From f841f34de31e04b3e997b0cb8f31a1286cdc7851 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 10:44:59 -0700 Subject: [PATCH 268/294] De-flake summary_interface_test - give unique name to each test. PiperOrigin-RevId: 167599317 --- tensorflow/core/kernels/summary_interface_test.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/summary_interface_test.cc b/tensorflow/core/kernels/summary_interface_test.cc index 66bde2cb06..0e24e8122a 100644 --- a/tensorflow/core/kernels/summary_interface_test.cc +++ b/tensorflow/core/kernels/summary_interface_test.cc @@ -32,6 +32,9 @@ Status SummaryTestHelper( const string& test_name, std::function writer_fn, std::function test_fn) { + static std::set* tests = new std::set(); + CHECK(tests->insert(test_name).second) << ": " << test_name; + SummaryWriterInterface* writer; Env* env = Env::Default(); TF_CHECK_OK( @@ -147,7 +150,7 @@ TEST(SummaryInterfaceTest, WriteImage) { TEST(SummaryInterfaceTest, WriteAudio) { TF_CHECK_OK(SummaryTestHelper( - "scalar_test", + "audio_test", [](SummaryWriterInterface* writer) { Tensor one(DT_FLOAT, TensorShape({1, 1})); one.scalar()() = 1.0; -- GitLab From 31ce478e277b4f68ac7e252e5feed853ea768fff Mon Sep 17 00:00:00 2001 From: lhlmgr Date: Tue, 5 Sep 2017 20:12:48 +0200 Subject: [PATCH 269/294] Update relaxed_onehot_categorical.py Fixed: default dtype in class documentation --- .../distributions/python/ops/relaxed_onehot_categorical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py index da1cd72a6f..699cf45a73 100644 --- a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py @@ -150,7 +150,7 @@ class ExpRelaxedOneHotCategorical(distribution.Distribution): `N - 1` dimensions index into a batch of independent distributions and the last dimension represents a vector of probabilities for each class. Only one of `logits` or `probs` should be passed in. - dtype: The type of the event samples (default: int32). + dtype: The type of the event samples (default: float32). validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect @@ -388,7 +388,7 @@ class RelaxedOneHotCategorical( dimensions index into a batch of independent distributions and the last dimension represents a vector of probabilities for each class. Only one of `logits` or `probs` should be passed in. - dtype: The type of the event samples (default: int32). + dtype: The type of the event samples (default: float32). validate_args: Unused in this distribution. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any -- GitLab From fd9c3f65870718a52c3627569aafc460febaf488 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 11:14:44 -0700 Subject: [PATCH 270/294] Verify that predictions are in the expected range for ops that use thresholds, e.g. tf.contrib.metrics.streaming_auc. PiperOrigin-RevId: 167604306 --- .../contrib/metrics/python/ops/metric_ops_test.py | 9 +++++++++ tensorflow/python/ops/metrics_impl.py | 14 ++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index 00cde08bff..9b959b43a9 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -1496,6 +1496,15 @@ class StreamingAUCTest(test.TestCase): for _ in range(10): self.assertAlmostEqual(initial_auc, auc.eval(), 5) + def testPredictionsOutOfRange(self): + with self.test_session() as sess: + predictions = constant_op.constant( + [1, -1, 1, -1], shape=(1, 4), dtype=dtypes_lib.float32) + labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4)) + _, update_op = metrics.streaming_auc(predictions, labels) + sess.run(variables.local_variables_initializer()) + self.assertRaises(errors_impl.InvalidArgumentError, update_op.eval) + def testAllCorrect(self): self.allCorrectAsExpected('ROC') diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 3b0a357b16..16320f7584 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -463,10 +463,16 @@ def _confusion_matrix_at_thresholds( if include not in all_includes: raise ValueError('Invaild key: %s.' % include) - predictions, labels, weights = _remove_squeezable_dimensions( - predictions=math_ops.to_float(predictions), - labels=math_ops.cast(labels, dtype=dtypes.bool), - weights=weights) + with ops.control_dependencies([ + check_ops.assert_greater_equal( + predictions, 0.0, message='predictions must be in [0, 1]'), + check_ops.assert_less_equal( + predictions, 1.0, message='predictions must be in [0, 1]') + ]): + predictions, labels, weights = _remove_squeezable_dimensions( + predictions=math_ops.to_float(predictions), + labels=math_ops.cast(labels, dtype=dtypes.bool), + weights=weights) num_thresholds = len(thresholds) -- GitLab From 6e5b81ee6ff645a437f0e5caf199446f884fcd3c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 11:19:38 -0700 Subject: [PATCH 271/294] Compute correct size for sparse label tensor which might exceed the number of timesteps, preventing a fatal error (of invalid sparse indices). PiperOrigin-RevId: 167605086 --- tensorflow/core/kernels/ctc_loss_op.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/ctc_loss_op.cc b/tensorflow/core/kernels/ctc_loss_op.cc index a1f6001914..fb03adb7a5 100644 --- a/tensorflow/core/kernels/ctc_loss_op.cc +++ b/tensorflow/core/kernels/ctc_loss_op.cc @@ -91,7 +91,14 @@ class CTCLossOp : public OpKernel { OP_REQUIRES(ctx, batch_size != 0, errors::InvalidArgument("batch_size must not be 0")); - TensorShape labels_shape({batch_size, max_time}); + // Figure out the maximum label length to use as sparse tensor dimension. + auto labels_indices_t = labels_indices->matrix(); + int64 max_label_len = 0; + for (int i = 0; i < labels_indices->dim_size(0); i++) { + max_label_len = std::max(max_label_len, labels_indices_t(i, 1) + 1); + } + + TensorShape labels_shape({batch_size, max_label_len}); std::vector order{0, 1}; sparse::SparseTensor labels_sp(*labels_indices, *labels_values, labels_shape, order); -- GitLab From f72e5d81cc6aad606a0ab36664bba1180cb4dd76 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Tue, 5 Sep 2017 11:47:56 -0700 Subject: [PATCH 272/294] [XLA] Allow caller to inspect rematerialization before/after sizes. PiperOrigin-RevId: 167609460 --- .../xla/service/hlo_rematerialization.cc | 22 +++++++++++++------ .../xla/service/hlo_rematerialization.h | 16 +++++++++++--- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 20152cf0ce..6e5d7bca75 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -1202,7 +1202,7 @@ StatusOr HloRematerialization::RematerializeComputation( StatusOr HloRematerialization::Run( HloModule* module, SequentialHloOrdering::HloModuleSequence* sequence, - int64 memory_limit_bytes) { + int64 memory_limit_bytes, RematerializationSizes* sizes) { // The sequence is constructed entirely by this method. TF_RET_CHECK(sequence->empty()); @@ -1319,13 +1319,20 @@ StatusOr HloRematerialization::Run( << HumanReadableNumBytes(reduced_peak_memory) << " (" << reduced_peak_memory << " bytes)"; + if (sizes != nullptr) { + sizes->before_bytes = before_peak_memory; + sizes->after_bytes = current_peak_memory; + } + XLA_VLOG_LINES(3, "After HloRematerialization:\n" + module->ToString()); if (current_peak_memory > memory_limit_bytes) { - LOG(WARNING) << "Can't reduce memory use below " - << HumanReadableNumBytes(memory_limit_bytes) - << " by rematerialization (only reduced to " - << HumanReadableNumBytes(current_peak_memory) << ")"; + LOG(WARNING) << tensorflow::strings::Printf( + "Can't reduce memory use below %s (%lld bytes) by rematerialization; " + "only reduced to %s (%lld bytes)", + HumanReadableNumBytes(memory_limit_bytes).c_str(), memory_limit_bytes, + HumanReadableNumBytes(current_peak_memory).c_str(), + current_peak_memory); } return changed; @@ -1334,9 +1341,10 @@ StatusOr HloRematerialization::Run( /* static */ StatusOr HloRematerialization::RematerializeAndSchedule( const HloRematerialization::ShapeSizeFunction& size_function, int64 memory_limit_bytes, HloModule* hlo_module, - SequentialHloOrdering::HloModuleSequence* sequence) { + SequentialHloOrdering::HloModuleSequence* sequence, + RematerializationSizes* sizes) { HloRematerialization remat(size_function); - return remat.Run(hlo_module, sequence, memory_limit_bytes); + return remat.Run(hlo_module, sequence, memory_limit_bytes, sizes); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.h b/tensorflow/compiler/xla/service/hlo_rematerialization.h index 42c279d440..11f79a6d41 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.h +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.h @@ -28,6 +28,13 @@ class HloRematerialization { public: using ShapeSizeFunction = std::function; + // Helper struct that communicates the before / after sizes for the + // rematerialization process. + struct RematerializationSizes { + int64 before_bytes; + int64 after_bytes; + }; + // Rematerialize HLO instructions in the given module to reduce peak memory // use below memory_limit_bytes where memory use is defined as the total size // of all live HLO instruction values. Parameters and constants are included @@ -46,6 +53,9 @@ class HloRematerialization { // rematerialization. This is the order in which HLO instructions should // be emitted to minimize memory use. // + // sizes: Optional outparam that indicates the peak memory usage of the HLO + // module before/after rematerialization. + // // Returns whether any instructions were rematerialized. If memory use is // already below the given limit then no instructions are rematerialized and // false is returned. @@ -55,8 +65,8 @@ class HloRematerialization { // code generation. static StatusOr RematerializeAndSchedule( const ShapeSizeFunction& size_function, int64 memory_limit_bytes, - HloModule* hlo_module, - SequentialHloOrdering::HloModuleSequence* sequence); + HloModule* hlo_module, SequentialHloOrdering::HloModuleSequence* sequence, + RematerializationSizes* sizes = nullptr); protected: HloRematerialization(const ShapeSizeFunction& size_function) @@ -69,7 +79,7 @@ class HloRematerialization { // contains the memory-minimizing order in which to emit the HLO instructions. StatusOr Run(HloModule* module, SequentialHloOrdering::HloModuleSequence* sequence, - int64 memory_limit); + int64 memory_limit, RematerializationSizes* sizes); // Rematerializes instructions within the given computation. 'order' is the // order in which the computation's instructions will be emitted in the -- GitLab From f60e54b3df3886095ee2f04190e7c205e81ea38c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 12:03:41 -0700 Subject: [PATCH 273/294] Fix "wrong enum" comparison. PiperOrigin-RevId: 167611717 --- tensorflow/core/kernels/conv_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index 9de8642d41..bbb9e36fc9 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -529,7 +529,7 @@ void LaunchConv2DOp::operator()( const int64 out_depths = GetTensorDim(*output, data_format, 'C'); const int64 patch_rows = filter.dim_size(0); const int64 patch_cols = filter.dim_size(1); - if (padding == Eigen::PADDING_SAME) { + if (padding == SAME) { // Total padding on rows and cols is // Pr = (R' - 1) * S + Kr - R // Pc = (C' - 1) * S + Kc - C -- GitLab From f58b4ee5035f598d075fab56af410d2a57686d17 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Tue, 5 Sep 2017 12:26:32 -0700 Subject: [PATCH 274/294] Indicate after which pass the HloVerifier failed in the HLO pipeline. Example emitted error: InvalidArgumentError: Expected instruction to have shape compatible with f32[], actual shape is f32[42,1]: %divide.1 = f32[42,1]{1,0} divide(f32[] %divide, f32[] %constant) Failed after running pass: algebraic-simplifier PiperOrigin-RevId: 167614587 --- .../compiler/xla/service/hlo_pass_pipeline.cc | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc index eb3da111a2..7ad33c8947 100644 --- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc +++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" using ::tensorflow::strings::StrAppend; +using ::tensorflow::strings::StrCat; namespace xla { @@ -54,11 +55,18 @@ StatusOr HloPassPipeline::Run(HloModule* module) { << tensorflow::str_util::Join(disabled_passes, ", "); } - auto run_invariant_checkers = [this, module]() -> Status { + auto run_invariant_checkers = [this, + module](const string& message) -> Status { for (auto& invariant_checker : invariant_checkers_) { VLOG(1) << " Invariant checker " << invariant_checker->name(); - TF_ASSIGN_OR_RETURN(bool changed, invariant_checker->Run(module)); - TF_RET_CHECK(!changed) << "invariant checkers must not change the graph"; + StatusOr changed_status = invariant_checker->Run(module); + if (!changed_status.ok()) { + return Status(changed_status.status().code(), + StrCat(changed_status.status().error_message(), + "\n\nFailed ", message)); + } + TF_RET_CHECK(!changed_status.ValueOrDie()) + << "invariant checkers must not change the graph"; } return Status::OK(); }; @@ -66,6 +74,8 @@ StatusOr HloPassPipeline::Run(HloModule* module) { string prefix = name().ToString() + ": pipeline start"; bool changed = false; string message; + TF_RETURN_IF_ERROR( + run_invariant_checkers(StrCat("before running pipeline: ", name()))); for (auto& pass : passes_) { if (disabled_passes.count(pass->name().ToString()) > 0) { VLOG(1) << " Skipping HLO pass " << pass->name() @@ -80,14 +90,14 @@ StatusOr HloPassPipeline::Run(HloModule* module) { StrAppend(&message, prefix, ", before ", pass->name()); DumpModule(*module, message); - TF_RETURN_IF_ERROR(run_invariant_checkers()); TF_ASSIGN_OR_RETURN(bool changed_this_pass, pass->Run(module)); + TF_RETURN_IF_ERROR( + run_invariant_checkers(StrCat("after running pass: ", pass->name()))); changed |= changed_this_pass; prefix.clear(); StrAppend(&prefix, name(), ": after ", pass->name()); } - TF_RETURN_IF_ERROR(run_invariant_checkers()); DumpModule(*module, prefix + ", pipeline end"); return changed; } -- GitLab From bc4d16221c3c713e2381e63435be1f30bccecd2f Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Tue, 5 Sep 2017 12:27:03 -0700 Subject: [PATCH 275/294] Increase test size in BUILD due to test timeout PiperOrigin-RevId: 167614646 --- tensorflow/python/kernel_tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 107f5a6a98..43827b0d10 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -518,7 +518,7 @@ tf_py_test( tf_py_test( name = "matrix_solve_ls_op_test", - size = "small", + size = "medium", srcs = ["matrix_solve_ls_op_test.py"], additional_deps = [ "//third_party/py/numpy", -- GitLab From 25a461e888b2cccb2fc996fb5b3186f01f62a12b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 12:40:03 -0700 Subject: [PATCH 276/294] Fix a missed variable watch. PiperOrigin-RevId: 167616202 --- tensorflow/python/ops/resource_variable_ops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 1471b5909e..2cae16f44c 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -505,6 +505,8 @@ class ResourceVariable(variables.Variable): def sparse_read(self, indices, name=None): """Reads the value of this variable sparsely, using `gather`.""" with ops.name_scope("Gather" if name is None else name) as name: + if self._trainable: + tape.watch(self._handle) value = resource_gather( self._handle, indices, dtype=self._dtype, name=name) return array_ops.identity(value) -- GitLab From 8ea10382c46de6f1e808ebeb1f80fc7754066c78 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 13:37:42 -0700 Subject: [PATCH 277/294] Speed up ExpectTensorEqual and Near in non-opt mode by using access to the raw buffer instead of through eigen accessor. PiperOrigin-RevId: 167624109 --- tensorflow/core/framework/tensor_testutil.h | 27 ++++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/framework/tensor_testutil.h b/tensorflow/core/framework/tensor_testutil.h index ab224aa718..4c216a84f0 100644 --- a/tensorflow/core/framework/tensor_testutil.h +++ b/tensorflow/core/framework/tensor_testutil.h @@ -166,10 +166,11 @@ struct Expector { static void Equal(const Tensor& x, const Tensor& y) { ASSERT_EQ(x.dtype(), DataTypeToEnum::v()); AssertSameTypeDims(x, y); - auto a = x.flat(); - auto b = y.flat(); - for (int i = 0; i < a.size(); ++i) { - ExpectEqual(a(i), b(i)); + const auto size = x.NumElements(); + const T* a = x.flat().data(); + const T* b = y.flat().data(); + for (int i = 0; i < size; ++i) { + ExpectEqual(a[i], b[i]); } } }; @@ -182,10 +183,11 @@ struct Expector { static void Equal(const Tensor& x, const Tensor& y) { ASSERT_EQ(x.dtype(), DataTypeToEnum::v()); AssertSameTypeDims(x, y); - auto a = x.flat(); - auto b = y.flat(); - for (int i = 0; i < a.size(); ++i) { - ExpectEqual(a(i), b(i)); + const auto size = x.NumElements(); + const T* a = x.flat().data(); + const T* b = y.flat().data(); + for (int i = 0; i < size; ++i) { + ExpectEqual(a[i], b[i]); } } @@ -199,10 +201,11 @@ struct Expector { static void Near(const Tensor& x, const Tensor& y, const double abs_err) { ASSERT_EQ(x.dtype(), DataTypeToEnum::v()); AssertSameTypeDims(x, y); - auto a = x.flat(); - auto b = y.flat(); - for (int i = 0; i < a.size(); ++i) { - Near(a(i), b(i), abs_err, i); + const auto size = x.NumElements(); + const T* a = x.flat().data(); + const T* b = y.flat().data(); + for (int i = 0; i < size; ++i) { + Near(a[i], b[i], abs_err, i); } } }; -- GitLab From 0a7958a8144bb3109977ac234555992c458b91fe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 14:09:50 -0700 Subject: [PATCH 278/294] [XLA] Also inject reduce-precision instructions inside kInput and kOutput fusion nodes. The kInput fusion node type has elementwise operations fused into the input side, and the kOutput fusion node type has elementwise operations fused onto the output side. Thus, we can include those along with kLoop fusion nodes in the reduce-precision-insertion-pass code path that injects the instructions inside the fusion node. PiperOrigin-RevId: 167628771 --- .../compiler/xla/service/reduce_precision_insertion.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc index 33327dc60f..8275531111 100644 --- a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc +++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc @@ -123,7 +123,8 @@ StatusOr ReducePrecisionInsertion::insert_on_inputs( } if (instruction->opcode() == HloOpcode::kFusion && - instruction->fusion_kind() == HloInstruction::FusionKind::kLoop) { + (instruction->fusion_kind() == HloInstruction::FusionKind::kLoop || + instruction->fusion_kind() == HloInstruction::FusionKind::kInput)) { // Insert the reduce-precision operation inside the fusion computation, // after the corresponding parameter instruction. TF_ASSIGN_OR_RETURN( @@ -173,7 +174,8 @@ StatusOr ReducePrecisionInsertion::insert_on_outputs( } if (instruction->opcode() == HloOpcode::kFusion && - instruction->fusion_kind() == HloInstruction::FusionKind::kLoop) { + (instruction->fusion_kind() == HloInstruction::FusionKind::kLoop || + instruction->fusion_kind() == HloInstruction::FusionKind::kOutput)) { // Insert the reduce-precision operation as the last operation inside // the fusion computation. HloInstruction* fusion_root = instruction->fused_expression_root(); -- GitLab From fdac5a54a9452875f53d4ab1959e46f8f7390f95 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 14:11:13 -0700 Subject: [PATCH 279/294] Log the test case name in tensorflow test case setup and teardown. PiperOrigin-RevId: 167629019 --- tensorflow/python/framework/test_util.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 73b7f821c8..04c7554a58 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -392,6 +392,7 @@ class TensorFlowTestCase(googletest.TestCase): self._cached_session = None def setUp(self): + logging.info("SET UP: %s" % str(self)) self._ClearCachedSession() random.seed(random_seed.DEFAULT_GRAPH_SEED) np.random.seed(random_seed.DEFAULT_GRAPH_SEED) @@ -406,6 +407,7 @@ class TensorFlowTestCase(googletest.TestCase): ops.get_default_graph().seed = random_seed.DEFAULT_GRAPH_SEED def tearDown(self): + logging.info("TEAR DOWN: %s" % str(self)) for thread in self._threads: self.assertFalse(thread.is_alive(), "A checkedThread did not terminate") -- GitLab From 9550f34249ec4b3c5f82d72877cfdca69b81f454 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 5 Sep 2017 14:36:28 -0700 Subject: [PATCH 280/294] Disable server_lib_test on windows. PiperOrigin-RevId: 167632798 --- tensorflow/contrib/cmake/tf_tests.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 15850bf0a4..eb02f20457 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -240,6 +240,8 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/training/quantize_training_test.py" # Needs quantization ops to be included in windows. "${tensorflow_source_dir}/tensorflow/python/training/supervisor_test.py" # Flaky I/O error on rename. "${tensorflow_source_dir}/tensorflow/python/training/sync_replicas_optimizer_test.py" # Needs portpicker. + "${tensorflow_source_dir}/tensorflow/python/training/server_lib_test.py" # Test occasionally deadlocks. + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/array_ops_test.py" # depends on python/framework/test_ops # Broken tensorboard test due to cmake issues. "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py" -- GitLab From e4886b1ea2c46d70db67bd8105c6c430c09305c9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Sep 2017 14:37:08 -0700 Subject: [PATCH 281/294] Update bazel-toolchains repo to use Bazel 0.5.4 toolchain configs. PiperOrigin-RevId: 167632896 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 646459646d..e5342cba77 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -699,9 +699,9 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.http_archive( name = "bazel_toolchains", urls = [ - "http://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/bccee4855c049d34bac481083b4c68e2fab8cc50.tar.gz", - "https://github.com/bazelbuild/bazel-toolchains/archive/bccee4855c049d34bac481083b4c68e2fab8cc50.tar.gz", + "http://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/9dbd803ad3b9447430a296810197b09b3a710956.tar.gz", + "https://github.com/bazelbuild/bazel-toolchains/archive/9dbd803ad3b9447430a296810197b09b3a710956.tar.gz", ], - sha256 = "3903fd93b96b42067e00b7973a2c16c34e761ad7a0b55e1557d408f352849e41", - strip_prefix = "bazel-toolchains-bccee4855c049d34bac481083b4c68e2fab8cc50", + sha256 = "0799aa12db5260a499beb40f81744e760c59d055bfc5d271dd2c2ed4d5419faa", + strip_prefix = "bazel-toolchains-9dbd803ad3b9447430a296810197b09b3a710956", ) -- GitLab From cedf33672f4006b8e63258c82e326888de000b6e Mon Sep 17 00:00:00 2001 From: Nicolas Lopez Date: Tue, 5 Sep 2017 17:52:49 -0400 Subject: [PATCH 282/294] adding missing dep on jdk for gen_ops.bzl (#12827) --- tensorflow/java/src/gen/gen_ops.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/java/src/gen/gen_ops.bzl b/tensorflow/java/src/gen/gen_ops.bzl index c09503ce8a..e0d5556122 100644 --- a/tensorflow/java/src/gen/gen_ops.bzl +++ b/tensorflow/java/src/gen/gen_ops.bzl @@ -53,7 +53,7 @@ def tf_java_op_gen_srcjar(name, native.genrule( name=name, - srcs=["@local_jdk//:jar"], + srcs=["@local_jdk//:jar"] + ["@local_jdk//:jdk"], outs=[gen_srcjar], tools=gen_tools, cmd='&&'.join(gen_cmds)) -- GitLab From 6f8f7e958d8ec28eb707ca8e3b293785113af31b Mon Sep 17 00:00:00 2001 From: resec Date: Wed, 6 Sep 2017 10:05:39 +0800 Subject: [PATCH 283/294] Add SONAME for Makefile-built libtensorflow_inference.so --- tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in b/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in index 631d52235a..26c1ad4947 100644 --- a/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in +++ b/tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in @@ -52,7 +52,9 @@ $(INFERENCE_SO_PATH): $(LIB_OBJS) $(INFERENCE_OBJS) @mkdir -p $(dir $@) $(CXX) $(CXXFLAGS) $(INCLUDES) \ -o $@ $(INFERENCE_OBJS) $(LIB_OBJS) \ - $(LIBFLAGS) $(LDFLAGS) -shared $(LIBS) + $(LIBFLAGS) $(LDFLAGS) \ + -shared -Wl,-soname,$(INFERENCE_SO_NAME) \ + $(LIBS) $(INFERENCE_SO_NAME): $(INFERENCE_SO_PATH) -- GitLab From ff213006623982582dc3be5f125ec33b01eb4d59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 6 Sep 2017 13:44:10 +0800 Subject: [PATCH 284/294] BLD: precheck patch --- tensorflow/workspace.bzl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 176719fabb..80f1b0a959 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -99,6 +99,9 @@ def _execute_and_check_ret_code(repo_ctx, cmd_and_args): # Apply a patch_file to the repository root directory # Runs 'patch -p1' def _apply_patch(repo_ctx, patch_file): + if not repo_ctx.which("patch"): + fail("patch command is not found, please install it") + cmd = [ "patch", "-p1", "-d", repo_ctx.path("."), "-i", repo_ctx.path(patch_file) ] -- GitLab From 6d05bd60366e249e1c36424262d46f5b9b9c6e44 Mon Sep 17 00:00:00 2001 From: Yun Peng Date: Wed, 6 Sep 2017 13:01:43 +0200 Subject: [PATCH 285/294] Make saver_test pass on Windows Bazel Build --- tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 3 +++ tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 3 +++ 2 files changed, 6 insertions(+) diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 61f5ed084c..f6e3d2e6c7 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -60,8 +60,11 @@ reinstall_tensorflow_pip ${PIP_NAME} # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore, # which will result testing system installed tensorflow +# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after +# https://github.com/tensorflow/tensorflow/issues/12844 is fixed. bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ --test_tag_filters=-no_pip,-no_windows \ --build_tag_filters=-no_pip,-no_windows --build_tests_only \ + --test_env=TF_SAVER_LENIENT_NAMES=True \ //${PY_TEST_DIR}/tensorflow/python/... diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh index e1972a3100..25d327c818 100644 --- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh @@ -61,8 +61,11 @@ reinstall_tensorflow_pip ${PIP_NAME} # Define no_tensorflow_py_deps=true so that every py_test has no deps anymore, # which will result testing system installed tensorflow # GPU tests are very flaky when running concurrently, so set local_test_jobs=1 +# TODO(pcloudy): Remove TF_SAVER_LENIENT_NAMES after +# https://github.com/tensorflow/tensorflow/issues/12844 is fixed. bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ --test_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu \ --build_tag_filters=-no_pip,-no_windows,-no_windows_gpu,-no_gpu,-no_pip_gpu \ + --test_env=TF_SAVER_LENIENT_NAMES=True \ --local_test_jobs=1 --build_tests_only //${PY_TEST_DIR}/tensorflow/python/... -- GitLab From 4a24db28868e3ec9c9bc7408b9f72862116d6d39 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Wed, 6 Sep 2017 09:49:14 -0400 Subject: [PATCH 286/294] Exported eager API TFE_* symbols in the dynamic library (#12785) * Exported TFE_* symbols in the dynamic library. * Exported the eager API symbols for Windows too. * Added missing macros. --- tensorflow/c/eager/c_api.h | 136 +++++++++++++++++------------ tensorflow/c/version_script.lds | 1 + tensorflow/tf_exported_symbols.lds | 1 + tensorflow/tf_version_script.lds | 1 + 4 files changed, 81 insertions(+), 58 deletions(-) diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 88a0dd343f..a54d206a30 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -20,6 +20,25 @@ limitations under the License. #include "tensorflow/c/c_api.h" +// Macro to control visibility of exported symbols in the shared library (.so, +// .dylib, .dll). +// This duplicates the TF_EXPORT macro definition in +// tensorflow/core/platform/macros.h in order to keep this .h file independent +// of any other includes.$a +#ifdef SWIG +#define TF_CAPI_EXPORT +#else +#if defined(COMPILER_MSVC) +#ifdef TF_COMPILE_LIBRARY +#define TF_CAPI_EXPORT __declspec(dllexport) +#else +#define TF_CAPI_EXPORT __declspec(dllimport) +#endif // TF_COMPILE_LIBRARY +#else +#define TF_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // COMPILER_MSVC +#endif // SWIG + #ifdef __cplusplus extern "C" { #endif @@ -30,11 +49,11 @@ extern "C" { // TODO(ashankar): Merge with TF_Session? typedef struct TFE_Context TFE_Context; -extern TFE_Context* TFE_NewContext(const TF_SessionOptions* opts, - TF_Status* status); -extern void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status); -extern TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, - TF_Status* status); +TF_CAPI_EXPORT extern TFE_Context* TFE_NewContext(const TF_SessionOptions* opts, + TF_Status* status); +TF_CAPI_EXPORT extern void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status); +TF_CAPI_EXPORT extern TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, + TF_Status* status); // A handle to a tensor on a device. // @@ -43,14 +62,15 @@ extern TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, // placed in memory of different devices or remote address spaces. typedef struct TFE_TensorHandle TFE_TensorHandle; -extern TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status); -extern void TFE_DeleteTensorHandle(TFE_TensorHandle* h); -extern TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h); -extern int TFE_TensorHandleNumDims(TFE_TensorHandle* h); -extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index); -extern const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h); -extern TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, - TF_Status* status); +TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, + TF_Status* status); +TF_CAPI_EXPORT extern void TFE_DeleteTensorHandle(TFE_TensorHandle* h); +TF_CAPI_EXPORT extern TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h); +TF_CAPI_EXPORT extern int TFE_TensorHandleNumDims(TFE_TensorHandle* h); +TF_CAPI_EXPORT extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index); +TF_CAPI_EXPORT extern const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h); +TF_CAPI_EXPORT extern TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, + TF_Status* status); // Create a new TFE_TensorHandle with the same contents as 'h' but placed // in the memory of the device name 'device_name'. @@ -58,10 +78,10 @@ extern TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, // that shares the underlying buffer. Otherwise, it currently requires at least // one of the source or destination devices to be CPU (i.e., for the source or // destination tensor to be placed in host memory). -extern TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, - TFE_Context* ctx, - const char* device_name, - TF_Status* status); +TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, + TFE_Context* ctx, + const char* device_name, + TF_Status* status); // Description of the TensorFlow op to execute. // @@ -76,49 +96,49 @@ extern TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, // the additional sanity checks there seem unnecessary; typedef struct TFE_Op TFE_Op; -extern TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, - TF_Status* status); -extern void TFE_DeleteOp(TFE_Op* op); +TF_CAPI_EXPORT extern TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, + TF_Status* status); +TF_CAPI_EXPORT extern void TFE_DeleteOp(TFE_Op* op); // TODO(ashankar): TFE_OpSetDevice and TFE_Execute should not have a TFE_Context // parameter. Instead, the TFE_Context should be captured when creating the // TFE_Op. -extern void TFE_OpSetDevice(TFE_Op* op, TFE_Context* ctx, - const char* device_name, TF_Status* status); - -extern void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status); - -extern TF_AttrType TFE_OpGetAttrType(TFE_Op* op, const char* attr_name, - unsigned char* is_list, TF_Status* status); - -extern void TFE_OpSetAttrString(TFE_Op* op, const char* attr_name, - const char* value); -extern void TFE_OpSetAttrInt(TFE_Op* op, const char* attr_name, int64_t value); -extern void TFE_OpSetAttrFloat(TFE_Op* op, const char* attr_name, float value); -extern void TFE_OpSetAttrBool(TFE_Op* op, const char* attr_name, - unsigned char value); -extern void TFE_OpSetAttrType(TFE_Op* op, const char* attr_name, - TF_DataType value); +TF_CAPI_EXPORT extern void TFE_OpSetDevice(TFE_Op* op, TFE_Context* ctx, + const char* device_name, TF_Status* status); + +TF_CAPI_EXPORT extern void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status); + +TF_CAPI_EXPORT extern TF_AttrType TFE_OpGetAttrType(TFE_Op* op, const char* attr_name, + unsigned char* is_list, TF_Status* status); + +TF_CAPI_EXPORT extern void TFE_OpSetAttrString(TFE_Op* op, const char* attr_name, + const char* value); +TF_CAPI_EXPORT extern void TFE_OpSetAttrInt(TFE_Op* op, const char* attr_name, int64_t value); +TF_CAPI_EXPORT extern void TFE_OpSetAttrFloat(TFE_Op* op, const char* attr_name, float value); +TF_CAPI_EXPORT extern void TFE_OpSetAttrBool(TFE_Op* op, const char* attr_name, + unsigned char value); +TF_CAPI_EXPORT extern void TFE_OpSetAttrType(TFE_Op* op, const char* attr_name, + TF_DataType value); // If the number of dimensions is unknown, `num_dims` must be set to // -1 and `dims` can be null. If a dimension is unknown, the // corresponding entry in the `dims` array must be -1. -extern void TFE_OpSetAttrShape(TFE_Op* op, const char* attr_name, - const int64_t* dims, const int num_dims, - TF_Status* out_status); - -extern void TFE_OpSetAttrStringList(TFE_Op* op, const char* attr_name, - const char** value, int num_values); -extern void TFE_OpSetAttrIntList(TFE_Op* op, const char* attr_name, - const int64_t* values, int num_values); -extern void TFE_OpSetAttrFloatList(TFE_Op* op, const char* attr_name, - const float* values, int num_values); -extern void TFE_OpSetAttrBoolList(TFE_Op* op, const char* attr_name, - const unsigned char* values, int num_values); -extern void TFE_OpSetAttrTypeList(TFE_Op* op, const char* attr_name, - const TF_DataType* values, int num_values); -extern void TFE_OpSetAttrShapeList(TFE_Op* op, const char* attr_name, - const int64_t** dims, const int* num_dims, - int num_values, TF_Status* out_status); +TF_CAPI_EXPORT extern void TFE_OpSetAttrShape(TFE_Op* op, const char* attr_name, + const int64_t* dims, const int num_dims, + TF_Status* out_status); + +TF_CAPI_EXPORT extern void TFE_OpSetAttrStringList(TFE_Op* op, const char* attr_name, + const char** value, int num_values); +TF_CAPI_EXPORT extern void TFE_OpSetAttrIntList(TFE_Op* op, const char* attr_name, + const int64_t* values, int num_values); +TF_CAPI_EXPORT extern void TFE_OpSetAttrFloatList(TFE_Op* op, const char* attr_name, + const float* values, int num_values); +TF_CAPI_EXPORT extern void TFE_OpSetAttrBoolList(TFE_Op* op, const char* attr_name, + const unsigned char* values, int num_values); +TF_CAPI_EXPORT extern void TFE_OpSetAttrTypeList(TFE_Op* op, const char* attr_name, + const TF_DataType* values, int num_values); +TF_CAPI_EXPORT extern void TFE_OpSetAttrShapeList(TFE_Op* op, const char* attr_name, + const int64_t** dims, const int* num_dims, + int num_values, TF_Status* out_status); // Execute the operation defined by 'op' and return handles to computed // tensors in 'retvals'. @@ -128,14 +148,14 @@ extern void TFE_OpSetAttrShapeList(TFE_Op* op, const char* attr_name, // // On return, 'num_retvals' will be set to the actual number of outputs // returned by the operation. -extern void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, - int* num_retvals, TF_Status* status); +TF_CAPI_EXPORT extern void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, + int* num_retvals, TF_Status* status); // Add a function (serialized FunctionDef protocol buffer) to ctx so // that it can be invoked using TFE_Execute. -extern void TFE_ContextAddFunctionDef(TFE_Context* ctx, - const char* serialized_function_def, - size_t size, TF_Status* status); +TF_CAPI_EXPORT extern void TFE_ContextAddFunctionDef(TFE_Context* ctx, + const char* serialized_function_def, + size_t size, TF_Status* status); #ifdef __cplusplus } /* end extern "C" */ diff --git a/tensorflow/c/version_script.lds b/tensorflow/c/version_script.lds index 455bd7362b..5855782003 100644 --- a/tensorflow/c/version_script.lds +++ b/tensorflow/c/version_script.lds @@ -2,6 +2,7 @@ VERS_1.0 { # Export symbols in c_api.h. global: TF_*; + TFE_*; # Hide everything else. local: diff --git a/tensorflow/tf_exported_symbols.lds b/tensorflow/tf_exported_symbols.lds index 4597d929a1..850f0edd94 100644 --- a/tensorflow/tf_exported_symbols.lds +++ b/tensorflow/tf_exported_symbols.lds @@ -2,4 +2,5 @@ *perftools*gputools* *tf_* TF_* +TFE_* *nsync_* diff --git a/tensorflow/tf_version_script.lds b/tensorflow/tf_version_script.lds index 88b64eb1f0..73d4c0cae4 100644 --- a/tensorflow/tf_version_script.lds +++ b/tensorflow/tf_version_script.lds @@ -3,6 +3,7 @@ tensorflow { *tensorflow*; *perftools*gputools*; TF_*; + TFE_*; *nsync_*; local: *; -- GitLab From e24ff272716382c42e83ab9b1eaccfe61c64935a Mon Sep 17 00:00:00 2001 From: Kenichi Ueno Date: Thu, 7 Sep 2017 08:35:52 +0900 Subject: [PATCH 287/294] Fix a path in README.md --- tensorflow/examples/ios/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/examples/ios/README.md b/tensorflow/examples/ios/README.md index 7974b8c879..7d2eb870be 100644 --- a/tensorflow/examples/ios/README.md +++ b/tensorflow/examples/ios/README.md @@ -30,7 +30,7 @@ cp ~/graphs/inception5h/* tensorflow/examples/ios/simple/data/ long time since it is big (~450MB). For example, if you want to run the simple example, then: ```bash -cd tensorflow/ios/simple +cd tensorflow/examples/ios/simple pod install open tf_simple_example.xcworkspace # obs, not the .xcodeproj directory ``` -- GitLab From 75e926041a0fdef0f3c2f6a7fbee4da50ea22a1e Mon Sep 17 00:00:00 2001 From: Alan Yee Date: Wed, 6 Sep 2017 16:38:08 -0700 Subject: [PATCH 288/294] Update bib.md Made correct explicit HTTPS call --- tensorflow/docs_src/about/bib.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/about/bib.md b/tensorflow/docs_src/about/bib.md index 0c0e88c1fe..c9f0c532c6 100644 --- a/tensorflow/docs_src/about/bib.md +++ b/tensorflow/docs_src/about/bib.md @@ -37,7 +37,7 @@ system, we suggest you cite this whitepaper.
 @misc{tensorflow2015-whitepaper,
 title={ {TensorFlow}: Large-Scale Machine Learning on Heterogeneous Systems},
-url={http://tensorflow.org/},
+url={https://www.tensorflow.org/},
 note={Software available from tensorflow.org},
 author={
     Mart\'{\i}n~Abadi and
-- 
GitLab


From fbff627a7a3b2fa57512fe6b3850fe0febb6315b Mon Sep 17 00:00:00 2001
From: Alan Yee 
Date: Wed, 6 Sep 2017 16:43:50 -0700
Subject: [PATCH 289/294] Update train.py

Made explicit HTTPS call
---
 tensorflow/examples/speech_commands/train.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/examples/speech_commands/train.py b/tensorflow/examples/speech_commands/train.py
index 8298a90b44..c92c38b23c 100644
--- a/tensorflow/examples/speech_commands/train.py
+++ b/tensorflow/examples/speech_commands/train.py
@@ -15,9 +15,10 @@
 r"""Simple speech recognition to spot a limited number of keywords.
 
 This is a self-contained example script that will train a very basic audio
-recognition model in TensorFlow. It can download the necessary training data,
-and runs with reasonable defaults to train within a few hours even only using a
-CPU. For more information see http://tensorflow.org/tutorials/audio_recognition.
+recognition model in TensorFlow. It downloads the necessary training data and
+runs with reasonable defaults to train within a few hours even only using a CPU.
+For more information, please see
+https://www.tensorflow.org/tutorials/audio_recognition.
 
 It is intended as an introduction to using neural networks for audio
 recognition, and is not a full speech recognition system. For more advanced
-- 
GitLab


From acaaddab6f55d6bc344747771709aadb4c0f14e2 Mon Sep 17 00:00:00 2001
From: Alan Yee 
Date: Wed, 6 Sep 2017 16:44:42 -0700
Subject: [PATCH 290/294] Update setup.py

Made correct explicit HTTPS call
---
 tensorflow/tools/pip_package/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index b009859e41..2b0d24d9a8 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -193,7 +193,7 @@ setup(
     version=_VERSION.replace('-', ''),
     description='TensorFlow helps the tensors flow',
     long_description='',
-    url='http://tensorflow.org/',
+    url='https://www.tensorflow.org/',
     author='Google Inc.',
     author_email='opensource@google.com',
     # Contained modules and scripts.
-- 
GitLab


From 18f36927160d05b941c056f10dc7f9aecaa05e23 Mon Sep 17 00:00:00 2001
From: Yifei Feng 
Date: Wed, 6 Sep 2017 23:52:56 -0700
Subject: [PATCH 291/294] Branch 167812735 (#12867)

* Internal cleanup

PiperOrigin-RevId: 167636242

* Move the Keras API to tf.keras.

PiperOrigin-RevId: 167638421

* Automated g4 rollback of changelist 167604306

PiperOrigin-RevId: 167639833

* Call HloComputation.Accept instead of HloInstruction.Accept to get all instructions profiled.

RELNOTES: n/a
PiperOrigin-RevId: 167640259

* Add fast math attributes to all generated methods when fast math enabled.

RELNOTES: n/a
PiperOrigin-RevId: 167646637

* Extended ScratchSpace to expose its underlying scratch tensor object.

PiperOrigin-RevId: 167649551

* Change zip(...)[1] to list(zip(...))[1], for python 3 compatibility.

PiperOrigin-RevId: 167654035

* Add scoped timer to log jit compile times.

RELNOTES: n/a
PiperOrigin-RevId: 167656720

* Verify that predictions are in the expected range for ops that use thresholds, e.g. tf.contrib.metrics.streaming_auc.

PiperOrigin-RevId: 167658134

* Internal change.

PiperOrigin-RevId: 167658401

* Fix list formatting

PiperOrigin-RevId: 167660250

* Enable java test.

PiperOrigin-RevId: 167660276

* Add shape functions on debug ops.

PiperOrigin-RevId: 167668811

* Increase session_bundle_test to a medium test.

PiperOrigin-RevId: 167672587

* Include layout of convolution input data in the op_profile.

PiperOrigin-RevId: 167680208

* Fix tf.sparse_add for SparseTensor with _ref typed values.

Example:
st = tf.SparseTensor(
    indices=[[1]], values=tf.Variable([1.0]), dense_shape=[1])
tf.sparse_add(st, st)
PiperOrigin-RevId: 167681121

* Fix conversion to explicit scalar broadcast

The dimensions field of a broadcast HLO op is meant to be populated with the
dimensions that are broadcasted, which in case of a scalar is the empty vector.
Generally, the rank of the operand of a broadcast op should always equal the
size of the dimensions vector.

PiperOrigin-RevId: 167686946

* Add 'unknown shape' shape functions on deprecated linalg ops.

PiperOrigin-RevId: 167719029

* Be more careful in IsInitalized, and log when it is called on an unknown
node_id.

PiperOrigin-RevId: 167722344

* tfdbg: Refactor graph-processing code out of debug_data.py

The basic idea is to separate the code in debug_data.py that handles graph structures into its own module (debug_graphs.py). This tackles an existing TODO item to simplify the code debug_data.DebugDumpDir.

In a later CL, code will be added to debug_graphs.DebugGraph to allow reconstruction of the original GraphDef, i.e., the GraphDef without the Copy* and Debug* nodes inserted by tfdbg. This will be useful for, among other things, the TensorBoard Debugger Plugin.

PiperOrigin-RevId: 167726113

* internal

PiperOrigin-RevId: 167727508

* Update MaxPoolV2Shape to support NCHV_VECT_C.

PiperOrigin-RevId: 167732437

* Deleting tf.contrib.learn.dnn benchmark tests.

PiperOrigin-RevId: 167741308

* Fix off-by-one documentation error.

sequence_lengths is the actual length of the sequence and therefor should not be used as zero-based indexing.
The code is correct but the documentation was misleading.

PiperOrigin-RevId: 167742082

* contrib summaries work in eager-graph mode (with defun)

As a side effect fix issues related to using eager-defined variables in graph
mode.

PiperOrigin-RevId: 167744121

* Fix minor documentation error in ZlibInputStream.

PiperOrigin-RevId: 167745218

* Sets the distributed training related properties of RunConfig based on TF_CONFIG.

PiperOrigin-RevId: 167752997

* Improved documentation about eval ops in EstimatorSpec.

PiperOrigin-RevId: 167753099

* Automated g4 rollback of changelist 156748870

PiperOrigin-RevId: 167753805

* Make cuda_solvers_gpu.cu.cc compile with nvcc8.

PiperOrigin-RevId: 167754383

* Add csv dataset example to get_started/regression.

PiperOrigin-RevId: 167754634

* Switches to OrderedDict to make the dictionary order deterministic so we have less randomness from graph building.

PiperOrigin-RevId: 167755072

* Add int8 version of fused_conv2d_bias_activation operator for the forward phase,
and support side_input and scaling parameters in float and int8 versions.

PiperOrigin-RevId: 167763219

* Make the text summary write no plugin data content

This is actually a safe removal because no logic makes use of the content of text plugin data.

PiperOrigin-RevId: 167763880

* Avoid unnecessary buffer allocations & deallocations

Before this change, when we reached the end of a file, we would
 (1) clear the existing buffer (which at large buffer sizes typically involved
     deallocating it).
 (2) reserve a buffer (which at large buffer sizes is non-trivial)
 (3) realize we had reached EoF, and therefore clear the buffer, deallocating
     it again.

With this change, whenever the buffered reader detects an EoF condition, we
remember it, so that we can short-circuit the above logic.

The above optimization results in a more than 25x performance improvement for
large buffers reading small files.

PiperOrigin-RevId: 167766751

* [TF:XLA] In Literal: correctly handle operands with zero elements in
Copy.

PiperOrigin-RevId: 167769308

* Reduce batch size for resampler backward pass test, to speed up test.

PiperOrigin-RevId: 167769539

* Remove `SimpleGraphExecutionState::costs_`, which is unused.

PiperOrigin-RevId: 167772120

* detecting cycles when users add a control edge to a graph

PiperOrigin-RevId: 167773598

* Make writer_test avoid setting content to a string

That content field of the PluginData proto is going to be converted into a bytes field, and setting it to a string makes the test fail. Furthermore, the purpose of this test is to make sure that correct data is written, so setting the name of the plugin suffices.

PiperOrigin-RevId: 167776457

* Propagate the original stack trace when exceptions caught be MonitoredSession
are re-raised.

PiperOrigin-RevId: 167781071

* Change trace.py to not access a graph as a default argument.

Checks for None and access via default graph inside the function.

PiperOrigin-RevId: 167788815

* Added custom metric support for tf.estimator.Estimator.

PiperOrigin-RevId: 167788891

* A eager Saver that allows restore on create.

PiperOrigin-RevId: 167789332

* Make content field of PluginData a bytes field

The content field had previously been a string field, which had been problematic because
string fields can only store UTF-8 strings.

This problem can manifest in various ways. For instance, take the precision-recall curve plugin. Its summary collects data that scales in size based on the number of thresholds. When the content field is a string, the summary logic serializes the relevant data proto just fine when we only have a few thresholds (about 100). However, for large numbers of thresholds (ie, around 200), the summary logic fails to serialize and throws a cryptic error.

ValueError: '\x10\xc8\x01' has type str, but isn't valid UTF-8 encoding. Non-UTF-8 strings must be converted to unicode objects before being added.

Changing the content field to a bytes field fixes this issue because bytes fields are not restricted to UTF-8 strings. I just happened to have needed a long enough string for the string to no longer be a valid UTF-8 one.

PiperOrigin-RevId: 167790594

* Temporarily disable tf_should_use wrapper, since it can cause
python Graph/Operation/Tensor memory leaks.

PiperOrigin-RevId: 167790657

* Ensure using "path" as a URI will keep working.

PiperOrigin-RevId: 167793848

* Fix typo in graph transforms error message

PiperOrigin-RevId: 167796563

* Merge changes from github.
END_PUBLIC

---
Commit 607816029 authored by Eugene Brevdo
Committed by TensorFlower Gardener:
Extended ScratchSpace to expose its underlying scratch tensor object.

PiperOrigin-RevId: 167649551

---
Commit db43fe68e authored by A. Unique TensorFlower
Committed by TensorFlower Gardener:
Add fast math attributes to all generated methods when fast math enabled.

RELNOTES: n/a
PiperOrigin-RevId: 167646637

---
Commit aebe8cc6f authored by A. Unique TensorFlower
Committed by TensorFlower Gardener:
Call HloComputation.Accept instead of HloInstruction.Accept to get all instructions profiled.

RELNOTES: n/a
PiperOrigin-RevId: 167640259

---
Commit 0ab137cd8 authored by A. Unique TensorFlower
Committed by TensorFlower Gardener:
BEGIN_PUBLIC
Automated g4 rollback of changelist 167604306

PiperOrigin-RevId: 167800256

* Update ops-related pbtxt files.

PiperOrigin-RevId: 167802521

* Go: Update generated wrapper functions for TensorFlow ops.

PiperOrigin-RevId: 167804076

* Add sloppy_interleave dataset operator.

When feeding data at high speed into a model from variable-latency data
sources, head-of-line blocking can be a significant concern when using a
deterministic input pipeline, such as interleave.

This change introduces a new non-deterministic dataset operator that avoids
head-of-line blocking.

PiperOrigin-RevId: 167810743

* Update ops-related pbtxt files.

PiperOrigin-RevId: 167811375

* tfdbg: Fix python3 breakage in grpc debug tests caused by bytes-type plugin_data content

PiperOrigin-RevId: 167812508

* [XLA] Rip CheckFusionNode() out of instruction, and move it into the HLO verifier instead.

CheckFusionNode() is linear in the size of the fusion node, and was called once per Fuse(), leading to run-time quadratic in the fusion node's size.

PiperOrigin-RevId: 167812735

* Disable
tensorflow/contrib/data/python/kernel_tests/sloppy_transformation_dataset_op_test.py
in cmake.
---
 configure.py                                  |   3 +-
 tensorflow/BUILD                              |   2 +
 tensorflow/c/c_api.cc                         |  65 ++
 tensorflow/c/eager/c_api_test.cc              |   4 +-
 tensorflow/c/python_api.cc                    |   1 -
 tensorflow/cc/framework/gradients.cc          |   4 +-
 tensorflow/cc/framework/testutil.cc           |   2 +-
 tensorflow/cc/framework/testutil.h            |   2 +-
 tensorflow/compiler/xla/literal_util.cc       |   7 +-
 tensorflow/compiler/xla/literal_util.h        |   3 +
 tensorflow/compiler/xla/literal_util_test.cc  |  31 +-
 .../compiler/xla/service/cpu/cpu_compiler.cc  |   8 +-
 .../compiler/xla/service/cpu/ir_emitter.cc    |   7 +
 .../xla/service/gpu/ir_emitter_nested.cc      |   3 +
 .../xla/service/gpu/ir_emitter_unnested.cc    |   3 +
 .../xla/service/hlo_evaluator_test.cc         |  47 ++
 .../compiler/xla/service/hlo_instruction.cc   |  71 --
 .../compiler/xla/service/hlo_instruction.h    |   3 -
 .../compiler/xla/service/hlo_verifier.cc      | 120 +++
 .../compiler/xla/service/hlo_verifier.h       |   3 +
 .../reduce_precision_insertion_test.cc        |   4 -
 .../compiler/xla/service/user_computation.cc  |   4 +-
 .../xla/tests/multioutput_fusion_test.cc      |   2 +-
 tensorflow/contrib/BUILD                      |   1 +
 tensorflow/contrib/__init__.py                |   1 +
 tensorflow/contrib/cmake/tf_python.cmake      |  45 ++
 tensorflow/contrib/cmake/tf_tests.cmake       |   2 +
 .../cudnn_rnn/python/ops/cudnn_rnn_ops.py     |   1 +
 tensorflow/contrib/data/BUILD                 |   1 +
 tensorflow/contrib/data/__init__.py           |   3 +
 .../contrib/data/python/kernel_tests/BUILD    |  21 +-
 .../kernel_tests/map_dataset_op_test.py       |   5 +-
 .../sloppy_transformation_dataset_op_test.py  | 475 ++++++++++++
 tensorflow/contrib/data/python/ops/BUILD      |  15 +
 .../contrib/data/python/ops/sloppy_ops.py     | 120 +++
 tensorflow/contrib/eager/python/BUILD         |  25 +
 tensorflow/contrib/eager/python/saver.py      | 122 +++
 tensorflow/contrib/eager/python/saver_test.py |  88 +++
 tensorflow/contrib/eager/python/tfe.py        |   3 +
 tensorflow/contrib/estimator/BUILD            |  61 ++
 tensorflow/contrib/estimator/__init__.py      |  29 +
 .../estimator/python/estimator/extenders.py   | 124 ++++
 .../python/estimator/extenders_test.py        | 135 ++++
 tensorflow/contrib/fused_conv/BUILD           |  13 +-
 .../fused_conv2d_bias_activation_op.cc        | 698 ++++++++++--------
 .../kernels/fused_conv2d_bias_activation_op.h |  31 +-
 .../fused_conv/kernels/fused_conv_ops_gpu.h   |  74 ++
 .../ops/fused_conv2d_bias_activation_op.cc    |  77 +-
 .../ops/fused_conv2d_bias_activation_op.py    | 107 +--
 .../fused_conv2d_bias_activation_op_test.py   | 289 +++++++-
 tensorflow/contrib/keras/BUILD                | 637 +---------------
 tensorflow/contrib/keras/README.md            |   3 +
 .../keras/api/keras/activations/__init__.py   |  26 +-
 .../applications/inception_v3/__init__.py     |   6 +-
 .../keras/applications/mobilenet/__init__.py  |   6 +-
 .../keras/applications/resnet50/__init__.py   |   6 +-
 .../api/keras/applications/vgg16/__init__.py  |   6 +-
 .../api/keras/applications/vgg19/__init__.py  |   6 +-
 .../keras/applications/xception/__init__.py   |   6 +-
 .../keras/api/keras/backend/__init__.py       | 276 +++----
 .../keras/api/keras/callbacks/__init__.py     |  26 +-
 .../keras/api/keras/constraints/__init__.py   |  24 +-
 .../keras/datasets/boston_housing/__init__.py |   2 +-
 .../api/keras/datasets/cifar10/__init__.py    |   2 +-
 .../api/keras/datasets/cifar100/__init__.py   |   2 +-
 .../keras/api/keras/datasets/imdb/__init__.py |   4 +-
 .../api/keras/datasets/mnist/__init__.py      |   2 +-
 .../api/keras/datasets/reuters/__init__.py    |   4 +-
 .../keras/api/keras/initializers/__init__.py  |  38 +-
 .../keras/api/keras/layers/__init__.py        | 184 ++---
 .../keras/api/keras/losses/__init__.py        |  34 +-
 .../keras/api/keras/metrics/__init__.py       |  38 +-
 .../keras/api/keras/models/__init__.py        |  14 +-
 .../keras/api/keras/optimizers/__init__.py    |  22 +-
 .../api/keras/preprocessing/image/__init__.py |  28 +-
 .../keras/preprocessing/sequence/__init__.py  |   6 +-
 .../api/keras/preprocessing/text/__init__.py  |   6 +-
 .../keras/api/keras/regularizers/__init__.py  |  16 +-
 .../contrib/keras/api/keras/utils/__init__.py |  30 +-
 .../keras/wrappers/scikit_learn/__init__.py   |   4 +-
 .../contrib/keras/python/keras/__init__.py    |  40 -
 .../keras/python/keras/layers/__init__.py     |  40 -
 .../keras/python/keras/utils/__init__.py      |  43 --
 .../keras/python/keras/utils/io_utils_test.py | 101 ---
 tensorflow/contrib/learn/BUILD                |  53 --
 .../learn/estimators/dnn_benchmark_test.py    | 257 -------
 .../dnn_linear_combined_benchmark_test.py     | 224 ------
 .../learn/python/learn/estimators/head.py     |   4 +-
 .../python/learn/estimators/rnn_common.py     |   2 +-
 .../python/ops/resampler_ops_test.py          |   2 +-
 tensorflow/contrib/session_bundle/BUILD       |   2 +-
 .../session_bundle/session_bundle_test.cc     |   3 +-
 tensorflow/contrib/summary/BUILD              |   5 +-
 tensorflow/contrib/summary/summary_ops.py     |  98 +--
 .../contrib/summary/summary_ops_test.py       |  27 +-
 .../kernels/v4/split_collection_operators.cc  |   7 +-
 .../tensorboard/plugins/trace/trace.py        |   4 +-
 .../contrib/tpu/profiler/op_profile.proto     |  12 +
 tensorflow/core/BUILD                         |   1 +
 .../simple_graph_execution_state.cc           |  13 -
 .../simple_graph_execution_state.h            |   9 -
 tensorflow/core/framework/common_shape_fns.cc | 188 +++--
 .../core/framework/common_shape_fns_test.cc   |  93 ++-
 tensorflow/core/framework/summary.proto       |   2 +-
 tensorflow/core/graph/mkl_layout_pass.cc      |   8 +-
 tensorflow/core/kernels/BUILD                 |  36 +-
 tensorflow/core/kernels/conv_ops_gpu.h        |  21 +-
 tensorflow/core/kernels/conv_ops_gpu_3.cu.cc  |   1 +
 tensorflow/core/kernels/crop_and_resize_op.cc | 579 +++++++++------
 tensorflow/core/kernels/crop_and_resize_op.h  |   8 +-
 .../core/kernels/crop_and_resize_op_gpu.cu.cc |   2 +-
 .../core/kernels/crop_and_resize_op_test.cc   |   4 +-
 tensorflow/core/kernels/cuda_solvers.h        |   3 +
 .../core/kernels/cuda_solvers_gpu.cu.cc       |  35 +-
 tensorflow/core/kernels/dataset_utils.cc      |  78 ++
 tensorflow/core/kernels/dataset_utils.h       |  35 +
 .../core/kernels/flat_map_dataset_op.cc       |  56 +-
 .../core/kernels/interleave_dataset_op.cc     |  62 +-
 .../core/kernels/mkl_conv_grad_input_ops.cc   |   6 +-
 tensorflow/core/kernels/mkl_conv_ops.cc       |  23 +-
 tensorflow/core/kernels/mkl_reshape_op.cc     |   1 +
 tensorflow/core/kernels/parse_tensor_op.cc    |  10 +-
 tensorflow/core/kernels/parse_tensor_test.cc  |  89 +--
 .../core/kernels/segment_reduction_ops.cc     |  10 +-
 .../kernels/segment_reduction_ops_gpu.cu.cc   |   8 +-
 .../kernels/sloppy_interleave_dataset_op.cc   | 370 ++++++++++
 tensorflow/core/kernels/summary_kernels.cc    |   7 +-
 .../core/lib/io/buffered_inputstream.cc       |  19 +
 tensorflow/core/lib/io/buffered_inputstream.h |   3 +
 .../core/lib/io/buffered_inputstream_test.cc  |  40 +
 tensorflow/core/lib/io/zlib_inputstream.h     |   2 +-
 tensorflow/core/ops/array_ops.cc              |  12 +-
 .../core/ops/compat/ops_history.v1.pbtxt      |  94 +++
 tensorflow/core/ops/dataset_ops.cc            |  27 +
 tensorflow/core/ops/debug_ops.cc              |   5 +
 tensorflow/core/ops/linalg_ops.cc             |  31 +-
 tensorflow/core/ops/ops.pbtxt                 |  71 ++
 tensorflow/core/platform/default/logging.h    |   2 +-
 tensorflow/core/platform/env_test.cc          |  16 +-
 tensorflow/core/util/activation_mode.cc       |   4 +-
 tensorflow/core/util/activation_mode.h        |   1 +
 .../docs_src/programmers_guide/datasets.md    |   6 +
 .../get_started/regression/dnn_regression.py  |  18 +-
 .../get_started/regression/imports85.py       | 170 ++++-
 .../regression/linear_regression.py           |  21 +-
 .../linear_regression_categorical.py          |  21 +-
 .../examples/get_started/regression/test.py   |  66 +-
 tensorflow/go/op/wrappers.go                  | 352 ++++-----
 tensorflow/java/BUILD                         |   3 +-
 tensorflow/java/src/gen/cc/op_gen_main.cc     |  26 +-
 tensorflow/java/src/gen/cc/op_generator.cc    |   8 +-
 tensorflow/java/src/gen/cc/op_generator.h     |   4 +-
 tensorflow/java/src/gen/gen_ops.bzl           |   8 +-
 tensorflow/python/BUILD                       |   7 +-
 tensorflow/python/__init__.py                 |   4 +-
 tensorflow/python/debug/BUILD                 |  34 +-
 tensorflow/python/debug/cli/analyzer_cli.py   |  16 +-
 tensorflow/python/debug/lib/debug_data.py     | 543 +++-----------
 .../python/debug/lib/debug_data_test.py       |  85 ---
 .../python/debug/lib/debug_gradients.py       |   5 +-
 tensorflow/python/debug/lib/debug_graphs.py   | 430 +++++++++++
 .../python/debug/lib/debug_graphs_test.py     | 112 +++
 .../python/debug/lib/grpc_debug_server.py     |  10 +-
 .../debug/lib/grpc_debug_test_server.py       |   3 +-
 .../debug/lib/session_debug_file_test.py      |   2 +-
 .../python/debug/lib/session_debug_testlib.py |   3 +-
 tensorflow/python/debug/lib/stepper.py        |   7 +-
 tensorflow/python/eager/backprop.py           |   2 +-
 tensorflow/python/eager/function.py           |   7 +
 .../python/eager/python_eager_op_gen.cc       |  14 +-
 tensorflow/python/estimator/model_fn.py       |   5 +-
 tensorflow/python/estimator/run_config.py     | 236 +++++-
 .../python/estimator/run_config_test.py       | 295 ++++++++
 .../python/feature_column/feature_column.py   |   6 +-
 .../feature_column/feature_column_test.py     |  14 +-
 tensorflow/python/framework/ops_test.py       |  18 +-
 .../python/framework/python_op_gen_main.cc    |   6 +-
 tensorflow/python/framework/tensor_util.py    |   6 +-
 .../python/framework/tensor_util_test.py      |   4 +-
 tensorflow/python/framework/test_util.py      |   8 +-
 tensorflow/python/keras/BUILD                 | 694 +++++++++++++++++
 tensorflow/python/keras/README.md             |   6 +
 tensorflow/python/keras/__init__.py           |  47 ++
 .../python/keras/_impl/keras/__init__.py      |  40 +
 .../keras/_impl}/keras/activations.py         |   6 +-
 .../keras/_impl}/keras/activations_test.py    |   2 +-
 .../_impl}/keras/applications/__init__.py     |  12 +-
 .../keras/applications/imagenet_utils.py      |   4 +-
 .../keras/applications/imagenet_utils_test.py |   2 +-
 .../_impl}/keras/applications/inception_v3.py |  32 +-
 .../keras/applications/inception_v3_test.py   |   2 +-
 .../_impl}/keras/applications/mobilenet.py    |  38 +-
 .../keras/applications/mobilenet_test.py      |   2 +-
 .../_impl}/keras/applications/resnet50.py     |  36 +-
 .../keras/applications/resnet50_test.py       |   2 +-
 .../keras/_impl}/keras/applications/vgg16.py  |  30 +-
 .../_impl}/keras/applications/vgg16_test.py   |   2 +-
 .../keras/_impl}/keras/applications/vgg19.py  |  30 +-
 .../_impl}/keras/applications/vgg19_test.py   |   2 +-
 .../_impl}/keras/applications/xception.py     |  32 +-
 .../keras/applications/xception_test.py       |   2 +-
 .../keras/_impl}/keras/backend.py             |   0
 .../keras/_impl}/keras/backend_test.py        |   3 +-
 .../keras/_impl}/keras/callbacks.py           |  57 +-
 .../keras/_impl}/keras/callbacks_test.py      |  11 +-
 .../keras/_impl}/keras/constraints.py         |   6 +-
 .../keras/_impl}/keras/constraints_test.py    |   2 +-
 .../keras/_impl}/keras/datasets/__init__.py   |  12 +-
 .../_impl}/keras/datasets/boston_housing.py   |   2 +-
 .../keras/_impl}/keras/datasets/cifar.py      |   0
 .../keras/_impl}/keras/datasets/cifar10.py    |   6 +-
 .../keras/_impl}/keras/datasets/cifar100.py   |   6 +-
 .../keras/_impl}/keras/datasets/imdb.py       |   2 +-
 .../keras/_impl}/keras/datasets/mnist.py      |   2 +-
 .../keras/_impl}/keras/datasets/reuters.py    |   2 +-
 .../keras/_impl}/keras/engine/__init__.py     |  12 +-
 .../keras/_impl}/keras/engine/topology.py     |  16 +-
 .../_impl}/keras/engine/topology_test.py      |   2 +-
 .../keras/_impl}/keras/engine/training.py     |  20 +-
 .../_impl}/keras/engine/training_test.py      |   6 +-
 .../keras/_impl}/keras/initializers.py        |   4 +-
 .../keras/_impl}/keras/initializers_test.py   |   2 +-
 .../keras/_impl}/keras/integration_test.py    |   4 +-
 .../keras/_impl/keras/layers/__init__.py      |  40 +
 .../keras/layers/advanced_activations.py      |  12 +-
 .../keras/layers/advanced_activations_test.py |   4 +-
 .../_impl}/keras/layers/convolutional.py      |  30 +-
 .../keras/layers/convolutional_recurrent.py   |  16 +-
 .../layers/convolutional_recurrent_test.py    |   4 +-
 .../_impl}/keras/layers/convolutional_test.py |   4 +-
 .../keras/_impl}/keras/layers/core.py         |  22 +-
 .../keras/_impl}/keras/layers/core_test.py    |   4 +-
 .../keras/_impl}/keras/layers/embeddings.py   |  10 +-
 .../_impl}/keras/layers/embeddings_test.py    |   4 +-
 .../keras/_impl}/keras/layers/gru_test.py     |   4 +-
 .../keras/_impl}/keras/layers/local.py        |  17 +-
 .../keras/_impl}/keras/layers/local_test.py   |   4 +-
 .../keras/_impl}/keras/layers/lstm_test.py    |   4 +-
 .../keras/_impl}/keras/layers/merge.py        |   4 +-
 .../keras/_impl}/keras/layers/merge_test.py   |   2 +-
 .../keras/_impl}/keras/layers/noise.py        |   4 +-
 .../keras/_impl}/keras/layers/noise_test.py   |   4 +-
 .../_impl}/keras/layers/normalization.py      |  10 +-
 .../_impl}/keras/layers/normalization_test.py |   4 +-
 .../keras/_impl}/keras/layers/pooling.py      |   8 +-
 .../keras/_impl}/keras/layers/pooling_test.py |   4 +-
 .../keras/_impl}/keras/layers/recurrent.py    |  16 +-
 .../_impl}/keras/layers/serialization.py      |  32 +-
 .../_impl}/keras/layers/serialization_test.py |   2 +-
 .../_impl}/keras/layers/simplernn_test.py     |   4 +-
 .../keras/_impl}/keras/layers/wrappers.py     |  10 +-
 .../_impl}/keras/layers/wrappers_test.py      |   2 +-
 .../keras/_impl}/keras/losses.py              |   4 +-
 .../keras/_impl}/keras/losses_test.py         |   2 +-
 .../keras/_impl}/keras/metrics.py             |  30 +-
 .../keras/_impl}/keras/metrics_test.py        |   2 +-
 .../keras/_impl}/keras/models.py              |  23 +-
 .../keras/_impl}/keras/models_test.py         |   2 +-
 .../keras/_impl}/keras/optimizers.py          |   6 +-
 .../keras/_impl}/keras/optimizers_test.py     |   4 +-
 .../_impl}/keras/preprocessing/__init__.py    |   6 +-
 .../keras/_impl}/keras/preprocessing/image.py |   2 +-
 .../_impl}/keras/preprocessing/image_test.py  |   2 +-
 .../_impl}/keras/preprocessing/sequence.py    |   0
 .../keras/preprocessing/sequence_test.py      |   2 +-
 .../keras/_impl}/keras/preprocessing/text.py  |   0
 .../_impl}/keras/preprocessing/text_test.py   |   2 +-
 .../keras/_impl}/keras/regularizers.py        |   6 +-
 .../keras/_impl}/keras/regularizers_test.py   |   4 +-
 .../keras/_impl}/keras/testing_utils.py       |   2 +-
 .../keras/_impl/keras/utils/__init__.py       |  43 ++
 .../keras/_impl}/keras/utils/conv_utils.py    |   2 +-
 .../keras/_impl}/keras/utils/data_utils.py    |   2 +-
 .../_impl}/keras/utils/data_utils_test.py     |   2 +-
 .../keras/_impl}/keras/utils/generic_utils.py |   0
 .../_impl}/keras/utils/generic_utils_test.py  |   2 +-
 .../keras/_impl}/keras/utils/io_utils.py      |   0
 .../keras/_impl/keras/utils/io_utils_test.py  | 100 +++
 .../keras/_impl}/keras/utils/layer_utils.py   |   4 +-
 .../keras/_impl}/keras/utils/np_utils.py      |   0
 .../keras/_impl}/keras/utils/vis_utils.py     |   4 +-
 .../keras/_impl}/keras/wrappers/__init__.py   |   2 +-
 .../_impl}/keras/wrappers/scikit_learn.py     |   4 +-
 .../keras/wrappers/scikit_learn_test.py       |   4 +-
 .../python/keras/activations/__init__.py      |  41 +
 .../python/keras/applications/__init__.py     |  36 +
 .../applications/inception_v3/__init__.py     |  27 +
 .../keras/applications/mobilenet/__init__.py  |  27 +
 .../keras/applications/resnet50/__init__.py   |  27 +
 .../keras/applications/vgg16/__init__.py      |  27 +
 .../keras/applications/vgg19/__init__.py      |  27 +
 .../keras/applications/xception/__init__.py   |  27 +
 tensorflow/python/keras/backend/__init__.py   | 163 ++++
 tensorflow/python/keras/callbacks/__init__.py |  37 +
 .../python/keras/constraints/__init__.py      |  40 +
 tensorflow/python/keras/datasets/__init__.py  |  30 +
 .../keras/datasets/boston_housing/__init__.py |  25 +
 .../python/keras/datasets/cifar10/__init__.py |  25 +
 .../keras/datasets/cifar100/__init__.py       |  25 +
 .../python/keras/datasets/imdb/__init__.py    |  26 +
 .../python/keras/datasets/mnist/__init__.py   |  25 +
 .../python/keras/datasets/reuters/__init__.py |  26 +
 .../python/keras/initializers/__init__.py     |  49 ++
 tensorflow/python/keras/layers/__init__.py    | 148 ++++
 tensorflow/python/keras/losses/__init__.py    |  45 ++
 tensorflow/python/keras/metrics/__init__.py   |  47 ++
 tensorflow/python/keras/models/__init__.py    |  31 +
 .../python/keras/optimizers/__init__.py       |  39 +
 .../python/keras/preprocessing/__init__.py    |  27 +
 .../keras/preprocessing/image/__init__.py     |  38 +
 .../keras/preprocessing/sequence/__init__.py  |  27 +
 .../keras/preprocessing/text/__init__.py      |  27 +
 .../python/keras/regularizers/__init__.py     |  38 +
 tensorflow/python/keras/utils/__init__.py     |  39 +
 tensorflow/python/keras/wrappers/__init__.py  |  25 +
 .../keras/wrappers/scikit_learn/__init__.py   |  26 +
 .../segment_reduction_ops_test.py             |  44 +-
 .../python/kernel_tests/sparse_ops_test.py    |  17 +
 tensorflow/python/layers/normalization.py     |  35 +-
 tensorflow/python/ops/metrics_impl.py         |  10 +-
 tensorflow/python/ops/parsing_ops.py          |   9 +-
 .../python/ops/resource_variable_ops.py       |  64 +-
 tensorflow/python/ops/sparse_ops.py           |   2 +-
 tensorflow/python/summary/text_summary.py     |  12 +-
 .../python/summary/writer/writer_test.py      |   5 +-
 .../python/training/monitored_session.py      |  20 +-
 .../python/training/monitored_session_test.py |  28 +
 tensorflow/python/training/saver_test.py      |  47 +-
 tensorflow/python/training/training_util.py   |   3 +-
 tensorflow/python/util/tf_should_use.py       |  78 +-
 tensorflow/python/util/tf_should_use_test.py  |  12 +
 tensorflow/stream_executor/cuda/cuda_dnn.cc   | 445 ++++++++---
 tensorflow/stream_executor/cuda/cuda_dnn.h    | 108 ++-
 tensorflow/stream_executor/dnn.h              | 139 +++-
 tensorflow/stream_executor/stream.cc          | 206 ++++--
 tensorflow/stream_executor/stream.h           |  98 ++-
 .../golden/tensorflow.keras.activations.pbtxt |  55 ++
 ...flow.keras.applications.inception_v3.pbtxt |  15 +
 ...sorflow.keras.applications.mobilenet.pbtxt |  15 +
 .../tensorflow.keras.applications.pbtxt       |  51 ++
 ...nsorflow.keras.applications.resnet50.pbtxt |  15 +
 .../tensorflow.keras.applications.vgg16.pbtxt |  15 +
 .../tensorflow.keras.applications.vgg19.pbtxt |  15 +
 ...nsorflow.keras.applications.xception.pbtxt |  15 +
 .../api/golden/tensorflow.keras.backend.pbtxt | 555 ++++++++++++++
 ...sorflow.keras.callbacks.-base-logger.pbtxt |  42 ++
 ...orflow.keras.callbacks.-c-s-v-logger.pbtxt |  42 ++
 ...tensorflow.keras.callbacks.-callback.pbtxt |  41 +
 ...flow.keras.callbacks.-early-stopping.pbtxt |  42 ++
 .../tensorflow.keras.callbacks.-history.pbtxt |  42 ++
 ...low.keras.callbacks.-lambda-callback.pbtxt |  42 ++
 ...s.callbacks.-learning-rate-scheduler.pbtxt |  42 ++
 ...ow.keras.callbacks.-model-checkpoint.pbtxt |  42 ++
 ...flow.keras.callbacks.-progbar-logger.pbtxt |  42 ++
 ...ras.callbacks.-reduce-l-r-on-plateau.pbtxt |  46 ++
 ...flow.keras.callbacks.-remote-monitor.pbtxt |  42 ++
 ...orflow.keras.callbacks.-tensor-board.pbtxt |  42 ++
 ...w.keras.callbacks.-terminate-on-na-n.pbtxt |  42 ++
 .../golden/tensorflow.keras.callbacks.pbtxt   |  55 ++
 ...orflow.keras.constraints.-constraint.pbtxt |  12 +
 ...nsorflow.keras.constraints.-max-norm.pbtxt |  14 +
 ...flow.keras.constraints.-min-max-norm.pbtxt |  14 +
 ...ensorflow.keras.constraints.-non-neg.pbtxt |  13 +
 ...sorflow.keras.constraints.-unit-norm.pbtxt |  14 +
 ...ensorflow.keras.constraints.max_norm.pbtxt |  14 +
 ...rflow.keras.constraints.min_max_norm.pbtxt |  14 +
 ...tensorflow.keras.constraints.non_neg.pbtxt |  13 +
 .../golden/tensorflow.keras.constraints.pbtxt |  51 ++
 ...nsorflow.keras.constraints.unit_norm.pbtxt |  14 +
 ...orflow.keras.datasets.boston_housing.pbtxt |   7 +
 .../tensorflow.keras.datasets.cifar10.pbtxt   |   7 +
 .../tensorflow.keras.datasets.cifar100.pbtxt  |   7 +
 .../tensorflow.keras.datasets.imdb.pbtxt      |  11 +
 .../tensorflow.keras.datasets.mnist.pbtxt     |   7 +
 .../golden/tensorflow.keras.datasets.pbtxt    |  27 +
 .../tensorflow.keras.datasets.reuters.pbtxt   |  11 +
 ...sorflow.keras.initializers.-constant.pbtxt |  18 +
 ...sorflow.keras.initializers.-identity.pbtxt |  18 +
 ...flow.keras.initializers.-initializer.pbtxt |  16 +
 .../tensorflow.keras.initializers.-ones.pbtxt |  18 +
 ...rflow.keras.initializers.-orthogonal.pbtxt |  18 +
 ...ow.keras.initializers.-random-normal.pbtxt |  18 +
 ...w.keras.initializers.-random-uniform.pbtxt |  18 +
 ...keras.initializers.-truncated-normal.pbtxt |  18 +
 ...keras.initializers.-variance-scaling.pbtxt |  18 +
 ...tensorflow.keras.initializers.-zeros.pbtxt |  18 +
 .../tensorflow.keras.initializers.pbtxt       |  79 ++
 .../tensorflow.keras.layers.-activation.pbtxt | 159 ++++
 ...eras.layers.-activity-regularization.pbtxt | 159 ++++
 .../golden/tensorflow.keras.layers.-add.pbtxt | 160 ++++
 ...nsorflow.keras.layers.-alpha-dropout.pbtxt | 159 ++++
 ...low.keras.layers.-average-pooling1-d.pbtxt | 161 ++++
 ...low.keras.layers.-average-pooling2-d.pbtxt | 161 ++++
 ...low.keras.layers.-average-pooling3-d.pbtxt | 161 ++++
 .../tensorflow.keras.layers.-average.pbtxt    | 160 ++++
 ...tensorflow.keras.layers.-avg-pool1-d.pbtxt | 161 ++++
 ...tensorflow.keras.layers.-avg-pool2-d.pbtxt | 161 ++++
 ...tensorflow.keras.layers.-avg-pool3-d.pbtxt | 161 ++++
 ...ow.keras.layers.-batch-normalization.pbtxt | 160 ++++
 ...nsorflow.keras.layers.-bidirectional.pbtxt | 172 +++++
 ...tensorflow.keras.layers.-concatenate.pbtxt | 160 ++++
 ...orflow.keras.layers.-conv-l-s-t-m2-d.pbtxt | 189 +++++
 .../tensorflow.keras.layers.-conv1-d.pbtxt    | 161 ++++
 ...flow.keras.layers.-conv2-d-transpose.pbtxt | 162 ++++
 .../tensorflow.keras.layers.-conv2-d.pbtxt    | 161 ++++
 ...flow.keras.layers.-conv3-d-transpose.pbtxt | 161 ++++
 .../tensorflow.keras.layers.-conv3-d.pbtxt    | 161 ++++
 ...sorflow.keras.layers.-convolution1-d.pbtxt | 161 ++++
 ...ras.layers.-convolution2-d-transpose.pbtxt | 162 ++++
 ...sorflow.keras.layers.-convolution2-d.pbtxt | 161 ++++
 ...ras.layers.-convolution3-d-transpose.pbtxt | 161 ++++
 ...sorflow.keras.layers.-convolution3-d.pbtxt | 161 ++++
 ...tensorflow.keras.layers.-cropping1-d.pbtxt | 159 ++++
 ...tensorflow.keras.layers.-cropping2-d.pbtxt | 159 ++++
 ...tensorflow.keras.layers.-cropping3-d.pbtxt | 159 ++++
 .../tensorflow.keras.layers.-dense.pbtxt      | 160 ++++
 .../golden/tensorflow.keras.layers.-dot.pbtxt | 160 ++++
 .../tensorflow.keras.layers.-dropout.pbtxt    | 160 ++++
 .../tensorflow.keras.layers.-e-l-u.pbtxt      | 159 ++++
 .../tensorflow.keras.layers.-embedding.pbtxt  | 159 ++++
 .../tensorflow.keras.layers.-flatten.pbtxt    | 159 ++++
 .../tensorflow.keras.layers.-g-r-u.pbtxt      | 180 +++++
 ...rflow.keras.layers.-gaussian-dropout.pbtxt | 159 ++++
 ...sorflow.keras.layers.-gaussian-noise.pbtxt | 159 ++++
 ...as.layers.-global-average-pooling1-d.pbtxt | 160 ++++
 ...as.layers.-global-average-pooling2-d.pbtxt | 160 ++++
 ...as.layers.-global-average-pooling3-d.pbtxt | 160 ++++
 ...low.keras.layers.-global-avg-pool1-d.pbtxt | 160 ++++
 ...low.keras.layers.-global-avg-pool2-d.pbtxt | 160 ++++
 ...low.keras.layers.-global-avg-pool3-d.pbtxt | 160 ++++
 ...low.keras.layers.-global-max-pool1-d.pbtxt | 160 ++++
 ...low.keras.layers.-global-max-pool2-d.pbtxt | 160 ++++
 ...low.keras.layers.-global-max-pool3-d.pbtxt | 160 ++++
 ....keras.layers.-global-max-pooling1-d.pbtxt | 160 ++++
 ....keras.layers.-global-max-pooling2-d.pbtxt | 160 ++++
 ....keras.layers.-global-max-pooling3-d.pbtxt | 160 ++++
 ...tensorflow.keras.layers.-input-layer.pbtxt | 160 ++++
 .../tensorflow.keras.layers.-input-spec.pbtxt |   9 +
 .../tensorflow.keras.layers.-l-s-t-m.pbtxt    | 180 +++++
 .../tensorflow.keras.layers.-lambda.pbtxt     | 159 ++++
 .../tensorflow.keras.layers.-layer.pbtxt      | 158 ++++
 ...ensorflow.keras.layers.-leaky-re-l-u.pbtxt | 159 ++++
 ...w.keras.layers.-locally-connected1-d.pbtxt | 159 ++++
 ...w.keras.layers.-locally-connected2-d.pbtxt | 159 ++++
 .../tensorflow.keras.layers.-masking.pbtxt    | 159 ++++
 ...tensorflow.keras.layers.-max-pool1-d.pbtxt | 161 ++++
 ...tensorflow.keras.layers.-max-pool2-d.pbtxt | 161 ++++
 ...tensorflow.keras.layers.-max-pool3-d.pbtxt | 161 ++++
 ...sorflow.keras.layers.-max-pooling1-d.pbtxt | 161 ++++
 ...sorflow.keras.layers.-max-pooling2-d.pbtxt | 161 ++++
 ...sorflow.keras.layers.-max-pooling3-d.pbtxt | 161 ++++
 .../tensorflow.keras.layers.-maximum.pbtxt    | 160 ++++
 .../tensorflow.keras.layers.-multiply.pbtxt   | 160 ++++
 .../tensorflow.keras.layers.-p-re-l-u.pbtxt   | 159 ++++
 .../tensorflow.keras.layers.-permute.pbtxt    | 159 ++++
 ...nsorflow.keras.layers.-repeat-vector.pbtxt | 159 ++++
 .../tensorflow.keras.layers.-reshape.pbtxt    | 159 ++++
 ...flow.keras.layers.-separable-conv2-d.pbtxt | 162 ++++
 ...ras.layers.-separable-convolution2-d.pbtxt | 162 ++++
 ...ensorflow.keras.layers.-simple-r-n-n.pbtxt | 180 +++++
 ...low.keras.layers.-spatial-dropout1-d.pbtxt | 161 ++++
 ...low.keras.layers.-spatial-dropout2-d.pbtxt | 161 ++++
 ...low.keras.layers.-spatial-dropout3-d.pbtxt | 161 ++++
 ...low.keras.layers.-thresholded-re-l-u.pbtxt | 159 ++++
 ...rflow.keras.layers.-time-distributed.pbtxt | 168 +++++
 ...sorflow.keras.layers.-up-sampling1-d.pbtxt | 159 ++++
 ...sorflow.keras.layers.-up-sampling2-d.pbtxt | 159 ++++
 ...sorflow.keras.layers.-up-sampling3-d.pbtxt | 159 ++++
 .../tensorflow.keras.layers.-wrapper.pbtxt    | 167 +++++
 ...orflow.keras.layers.-zero-padding1-d.pbtxt | 159 ++++
 ...orflow.keras.layers.-zero-padding2-d.pbtxt | 159 ++++
 ...orflow.keras.layers.-zero-padding3-d.pbtxt | 159 ++++
 .../api/golden/tensorflow.keras.layers.pbtxt  | 371 ++++++++++
 .../api/golden/tensorflow.keras.losses.pbtxt  |  71 ++
 .../api/golden/tensorflow.keras.metrics.pbtxt |  79 ++
 .../tensorflow.keras.models.-model.pbtxt      | 249 +++++++
 .../tensorflow.keras.models.-sequential.pbtxt | 274 +++++++
 .../api/golden/tensorflow.keras.models.pbtxt  |  31 +
 ...ensorflow.keras.optimizers.-adadelta.pbtxt |  34 +
 ...tensorflow.keras.optimizers.-adagrad.pbtxt |  34 +
 .../tensorflow.keras.optimizers.-adam.pbtxt   |  34 +
 .../tensorflow.keras.optimizers.-adamax.pbtxt |  34 +
 .../tensorflow.keras.optimizers.-nadam.pbtxt  |  34 +
 ...nsorflow.keras.optimizers.-optimizer.pbtxt |  33 +
 ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt |  34 +
 .../tensorflow.keras.optimizers.-s-g-d.pbtxt  |  34 +
 .../golden/tensorflow.keras.optimizers.pbtxt  |  47 ++
 .../tools/api/golden/tensorflow.keras.pbtxt   |  71 ++
 ...processing.image.-directory-iterator.pbtxt |  18 +
 ...ocessing.image.-image-data-generator.pbtxt |  29 +
 ....keras.preprocessing.image.-iterator.pbtxt |  13 +
 ...ocessing.image.-numpy-array-iterator.pbtxt |  18 +
 ...tensorflow.keras.preprocessing.image.pbtxt |  59 ++
 .../tensorflow.keras.preprocessing.pbtxt      |  15 +
 ...sorflow.keras.preprocessing.sequence.pbtxt |  15 +
 ....keras.preprocessing.text.-tokenizer.pbtxt |  33 +
 .../tensorflow.keras.preprocessing.text.pbtxt |  15 +
 ...tensorflow.keras.regularizers.-l1-l2.pbtxt |  18 +
 ...flow.keras.regularizers.-regularizer.pbtxt |  12 +
 .../tensorflow.keras.regularizers.pbtxt       |  35 +
 ...low.keras.utils.-custom-object-scope.pbtxt |   9 +
 ...flow.keras.utils.-generator-enqueuer.pbtxt |  26 +
 ...ensorflow.keras.utils.-h-d-f5-matrix.pbtxt |  29 +
 .../tensorflow.keras.utils.-progbar.pbtxt     |  17 +
 ...rflow.keras.utils.-sequence-enqueuer.pbtxt |  24 +
 .../tensorflow.keras.utils.-sequence.pbtxt    |  12 +
 .../api/golden/tensorflow.keras.utils.pbtxt   |  63 ++
 .../golden/tensorflow.keras.wrappers.pbtxt    |   7 +
 ...ppers.scikit_learn.-keras-classifier.pbtxt |  42 ++
 ...appers.scikit_learn.-keras-regressor.pbtxt |  38 +
 ...nsorflow.keras.wrappers.scikit_learn.pbtxt |  11 +
 tensorflow/tools/api/golden/tensorflow.pbtxt  |   4 +
 tensorflow/tools/ci_build/ci_sanity.sh        |   2 +-
 .../tools/ci_build/linux/cpu/run_cc_core.sh   |   2 +-
 .../graph_transforms/remove_attribute.cc      |   2 +-
 515 files changed, 27182 insertions(+), 4704 deletions(-)
 create mode 100644 tensorflow/contrib/data/python/kernel_tests/sloppy_transformation_dataset_op_test.py
 create mode 100644 tensorflow/contrib/data/python/ops/sloppy_ops.py
 create mode 100644 tensorflow/contrib/eager/python/saver.py
 create mode 100644 tensorflow/contrib/eager/python/saver_test.py
 create mode 100644 tensorflow/contrib/estimator/BUILD
 create mode 100644 tensorflow/contrib/estimator/__init__.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/extenders.py
 create mode 100644 tensorflow/contrib/estimator/python/estimator/extenders_test.py
 create mode 100644 tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
 delete mode 100644 tensorflow/contrib/keras/python/keras/__init__.py
 delete mode 100644 tensorflow/contrib/keras/python/keras/layers/__init__.py
 delete mode 100644 tensorflow/contrib/keras/python/keras/utils/__init__.py
 delete mode 100644 tensorflow/contrib/keras/python/keras/utils/io_utils_test.py
 delete mode 100644 tensorflow/contrib/learn/python/learn/estimators/dnn_benchmark_test.py
 delete mode 100644 tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_benchmark_test.py
 create mode 100644 tensorflow/core/kernels/dataset_utils.cc
 create mode 100644 tensorflow/core/kernels/dataset_utils.h
 create mode 100644 tensorflow/core/kernels/sloppy_interleave_dataset_op.cc
 create mode 100644 tensorflow/python/debug/lib/debug_graphs.py
 create mode 100644 tensorflow/python/debug/lib/debug_graphs_test.py
 create mode 100644 tensorflow/python/keras/BUILD
 create mode 100644 tensorflow/python/keras/README.md
 create mode 100644 tensorflow/python/keras/__init__.py
 create mode 100644 tensorflow/python/keras/_impl/keras/__init__.py
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/activations.py (93%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/activations_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/__init__.py (64%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/imagenet_utils.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/imagenet_utils_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/inception_v3.py (91%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/inception_v3_test.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/mobilenet.py (95%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/mobilenet_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/resnet50.py (87%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/resnet50_test.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/vgg16.py (86%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/vgg16_test.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/vgg19.py (86%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/vgg19_test.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/xception.py (90%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/applications/xception_test.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/backend.py (100%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/backend_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/callbacks.py (94%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/callbacks_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/constraints.py (96%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/constraints_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/datasets/__init__.py (68%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/datasets/boston_housing.py (96%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/datasets/cifar.py (100%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/datasets/cifar10.py (89%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/datasets/cifar100.py (89%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/datasets/imdb.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/datasets/mnist.py (94%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/datasets/reuters.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/engine/__init__.py (67%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/engine/topology.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/engine/topology_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/engine/training.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/engine/training_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/initializers.py (96%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/initializers_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/integration_test.py (99%)
 create mode 100644 tensorflow/python/keras/_impl/keras/layers/__init__.py
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/advanced_activations.py (94%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/advanced_activations_test.py (94%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/convolutional.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/convolutional_recurrent.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/convolutional_recurrent_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/convolutional_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/core.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/core_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/embeddings.py (95%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/embeddings_test.py (95%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/gru_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/local.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/local_test.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/lstm_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/merge.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/merge_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/noise.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/noise_test.py (93%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/normalization.py (94%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/normalization_test.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/pooling.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/pooling_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/recurrent.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/serialization.py (59%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/serialization_test.py (96%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/simplernn_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/wrappers.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/layers/wrappers_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/losses.py (95%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/losses_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/metrics.py (67%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/metrics_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/models.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/models_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/optimizers.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/optimizers_test.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/preprocessing/__init__.py (79%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/preprocessing/image.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/preprocessing/image_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/preprocessing/sequence.py (100%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/preprocessing/sequence_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/preprocessing/text.py (100%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/preprocessing/text_test.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/regularizers.py (90%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/regularizers_test.py (95%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/testing_utils.py (99%)
 create mode 100644 tensorflow/python/keras/_impl/keras/utils/__init__.py
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/utils/conv_utils.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/utils/data_utils.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/utils/data_utils_test.py (99%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/utils/generic_utils.py (100%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/utils/generic_utils_test.py (97%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/utils/io_utils.py (100%)
 create mode 100644 tensorflow/python/keras/_impl/keras/utils/io_utils_test.py
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/utils/layer_utils.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/utils/np_utils.py (100%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/utils/vis_utils.py (95%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/wrappers/__init__.py (91%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/wrappers/scikit_learn.py (98%)
 rename tensorflow/{contrib/keras/python => python/keras/_impl}/keras/wrappers/scikit_learn_test.py (98%)
 create mode 100644 tensorflow/python/keras/activations/__init__.py
 create mode 100644 tensorflow/python/keras/applications/__init__.py
 create mode 100644 tensorflow/python/keras/applications/inception_v3/__init__.py
 create mode 100644 tensorflow/python/keras/applications/mobilenet/__init__.py
 create mode 100644 tensorflow/python/keras/applications/resnet50/__init__.py
 create mode 100644 tensorflow/python/keras/applications/vgg16/__init__.py
 create mode 100644 tensorflow/python/keras/applications/vgg19/__init__.py
 create mode 100644 tensorflow/python/keras/applications/xception/__init__.py
 create mode 100644 tensorflow/python/keras/backend/__init__.py
 create mode 100644 tensorflow/python/keras/callbacks/__init__.py
 create mode 100644 tensorflow/python/keras/constraints/__init__.py
 create mode 100644 tensorflow/python/keras/datasets/__init__.py
 create mode 100644 tensorflow/python/keras/datasets/boston_housing/__init__.py
 create mode 100644 tensorflow/python/keras/datasets/cifar10/__init__.py
 create mode 100644 tensorflow/python/keras/datasets/cifar100/__init__.py
 create mode 100644 tensorflow/python/keras/datasets/imdb/__init__.py
 create mode 100644 tensorflow/python/keras/datasets/mnist/__init__.py
 create mode 100644 tensorflow/python/keras/datasets/reuters/__init__.py
 create mode 100644 tensorflow/python/keras/initializers/__init__.py
 create mode 100644 tensorflow/python/keras/layers/__init__.py
 create mode 100644 tensorflow/python/keras/losses/__init__.py
 create mode 100644 tensorflow/python/keras/metrics/__init__.py
 create mode 100644 tensorflow/python/keras/models/__init__.py
 create mode 100644 tensorflow/python/keras/optimizers/__init__.py
 create mode 100644 tensorflow/python/keras/preprocessing/__init__.py
 create mode 100644 tensorflow/python/keras/preprocessing/image/__init__.py
 create mode 100644 tensorflow/python/keras/preprocessing/sequence/__init__.py
 create mode 100644 tensorflow/python/keras/preprocessing/text/__init__.py
 create mode 100644 tensorflow/python/keras/regularizers/__init__.py
 create mode 100644 tensorflow/python/keras/utils/__init__.py
 create mode 100644 tensorflow/python/keras/wrappers/__init__.py
 create mode 100644 tensorflow/python/keras/wrappers/scikit_learn/__init__.py
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.activations.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.applications.inception_v3.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.applications.mobilenet.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.applications.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.applications.resnet50.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.applications.vgg16.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.applications.vgg19.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.applications.xception.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.backend.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-base-logger.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-callback.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-early-stopping.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-history.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-lambda-callback.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-learning-rate-scheduler.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-progbar-logger.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-remote-monitor.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-tensor-board.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.callbacks.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.constraints.-constraint.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.constraints.-max-norm.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.constraints.-min-max-norm.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.constraints.-non-neg.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.constraints.-unit-norm.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.constraints.max_norm.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.constraints.min_max_norm.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.constraints.non_neg.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.constraints.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.constraints.unit_norm.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.datasets.boston_housing.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.datasets.cifar10.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.datasets.cifar100.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.datasets.imdb.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.datasets.mnist.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.datasets.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.datasets.reuters.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.-constant.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.-identity.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.-initializer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.-ones.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.-orthogonal.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.-random-normal.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.-random-uniform.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.-truncated-normal.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.-variance-scaling.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.-zeros.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-input-spec.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.layers.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.losses.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.models.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.optimizers.-adadelta.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.optimizers.-adagrad.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.optimizers.-adam.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.optimizers.-adamax.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.optimizers.-nadam.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.optimizers.-optimizer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.optimizers.-s-g-d.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.optimizers.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-directory-iterator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-image-data-generator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-iterator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.-numpy-array-iterator.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.preprocessing.image.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.preprocessing.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.preprocessing.sequence.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.preprocessing.text.-tokenizer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.preprocessing.text.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.regularizers.-l1-l2.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.regularizers.-regularizer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.regularizers.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.utils.-custom-object-scope.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.utils.-generator-enqueuer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.utils.-h-d-f5-matrix.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.utils.-progbar.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.utils.-sequence-enqueuer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.utils.-sequence.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.utils.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.wrappers.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.wrappers.scikit_learn.-keras-classifier.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.wrappers.scikit_learn.-keras-regressor.pbtxt
 create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.wrappers.scikit_learn.pbtxt

diff --git a/configure.py b/configure.py
index fcf359d061..ef5051d275 100644
--- a/configure.py
+++ b/configure.py
@@ -688,7 +688,8 @@ def set_tf_cunn_version(environ_cp):
                                            cudnn_path_from_ldconfig)
       if cudnn_path_from_ldconfig:
         cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1)
-        if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)):
+        if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig,
+                                     tf_cudnn_version)):
           cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig)
           break
 
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 5b6a18b6a6..5538052d02 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -290,6 +290,7 @@ filegroup(
         "//tensorflow/contrib/decision_trees/proto:all_files",
         "//tensorflow/contrib/distributions:all_files",
         "//tensorflow/contrib/eager/python:all_files",
+        "//tensorflow/contrib/estimator:all_files",
         "//tensorflow/contrib/factorization:all_files",
         "//tensorflow/contrib/factorization/kernels:all_files",
         "//tensorflow/contrib/ffmpeg:all_files",
@@ -407,6 +408,7 @@ filegroup(
         "//tensorflow/python/eager:all_files",
         "//tensorflow/python/estimator:all_files",
         "//tensorflow/python/feature_column:all_files",
+        "//tensorflow/python/keras:all_files",
         "//tensorflow/python/kernel_tests:all_files",
         "//tensorflow/python/kernel_tests/distributions:all_files",
         "//tensorflow/python/ops/distributions:all_files",
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index c454c94249..334f867e47 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -374,6 +374,65 @@ void TF_Reset_Helper(const TF_SessionOptions* opt, const char** containers,
   status->status = Reset(opt->options, container_names);
 }
 
+// This traverses the specified nodes in topological order to verify there are
+// no cycles. Starting with inputless nodes, it visits nodes whose inputs have
+// all been visited, and counts the total number of visited nodes. If there is a
+// cycle, nodes in the cycle will never be visited, and the visited count will
+// be less than the total node count.
+Status ValidateNoCycles(const Graph& g) {
+  // TODO(nolivia): check this on a subset of the graph instead of all of it.
+  int total_num_nodes = g.num_node_ids();
+  // A node is ready when all of its inputs have been visited.
+  std::vector ready;
+  std::vector pending_count(total_num_nodes, 0);
+
+  for (int i = 0; i < total_num_nodes; ++i) {
+    const Node* n = g.FindNodeId(i);
+    if (n == nullptr) continue;
+    pending_count[i] = n->in_edges().size();
+    if (n->IsMerge()) {
+      // While-loop cycles are legal cycles so we manually adjust the
+      // pending_count to make sure that the loop is visited.
+      for (const Edge* e : n->in_edges()) {
+        if (!e->IsControlEdge() && e->src()->IsNextIteration()) {
+          pending_count[i]--;
+        }
+      }
+    }
+    if (pending_count[i] == 0) {
+      ready.push_back(n);
+    }
+  }
+
+  int processed = 0;
+  while (!ready.empty()) {
+    const Node* node = ready.back();
+    ready.pop_back();
+    ++processed;
+
+    for (const Edge* out : node->out_edges()) {
+      const int output_id = out->dst()->id();
+      pending_count[output_id]--;
+      if (pending_count[output_id] == 0) {
+        ready.push_back(out->dst());
+      }
+    }
+  }
+
+  if (processed < total_num_nodes) {
+    std::vector nodes_in_cycle;
+    for (int i = 0; i < pending_count.size() && nodes_in_cycle.size() < 3;
+         ++i) {
+      if (pending_count[i] != 0) {
+        nodes_in_cycle.push_back(g.FindNodeId(i)->name());
+      }
+    }
+    return errors::InvalidArgument(
+        "Graph is invalid, contains a cycle with ", total_num_nodes - processed,
+        " nodes, including: ", str_util::Join(nodes_in_cycle, ", "));
+  }
+  return Status::OK();
+}
 }  // namespace
 }  // namespace tensorflow
 
@@ -2251,6 +2310,12 @@ static bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status) {
     const Graph& graph = session->graph->graph;
     const auto num_nodes = graph.num_node_ids();
     if (session->last_num_graph_nodes < num_nodes) {
+      status->status = tensorflow::ValidateNoCycles(session->graph->graph);
+      if (!status->status.ok()) {
+        session->graph->mu.unlock();
+        return false;
+      }
+
       GraphDef graph_def;
       *graph_def.mutable_versions() = graph.versions();
       // Fill graph_def with nodes with ids in the range
diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc
index d19583a3ab..72e0fe8a15 100644
--- a/tensorflow/c/eager/c_api_test.cc
+++ b/tensorflow/c/eager/c_api_test.cc
@@ -38,6 +38,7 @@ TFE_TensorHandle* TestMatrixTensorHandle() {
   TFE_TensorHandle* th = TFE_NewTensorHandle(t, status);
   CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TF_DeleteTensor(t);
+  TF_DeleteStatus(status);
   return th;
 }
 
@@ -385,7 +386,8 @@ TFE_TensorHandle* CreateVariable(TFE_Context* ctx, float value,
   memcpy(TF_TensorData(t.get()), &value, TF_TensorByteSize(t.get()));
 
   std::unique_ptr
-      value_handle(TFE_NewTensorHandle(t.get(), status), TFE_DeleteTensorHandle);
+      value_handle(TFE_NewTensorHandle(t.get(), status),
+                   TFE_DeleteTensorHandle);
   if (TF_GetCode(status) != TF_OK) return nullptr;
 
   TFE_OpAddInput(op, value_handle.get(), status);
diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc
index adca6c7625..b8d36b8947 100644
--- a/tensorflow/c/python_api.cc
+++ b/tensorflow/c/python_api.cc
@@ -20,7 +20,6 @@ limitations under the License.
 namespace tensorflow {
 
 void AddControlInput(TF_Graph* graph, TF_Operation* op, TF_Operation* input) {
-  // TODO(skyewm): make sure cycles are prevented
   mutex_lock l(graph->mu);
   graph->graph.AddControlEdge(&input->node, &op->node);
 }
diff --git a/tensorflow/cc/framework/gradients.cc b/tensorflow/cc/framework/gradients.cc
index 1868207148..82469261e5 100644
--- a/tensorflow/cc/framework/gradients.cc
+++ b/tensorflow/cc/framework/gradients.cc
@@ -77,7 +77,7 @@ class SymbolicGradientBuilder {
   Status CallGradFunction(const Operation& op,
                           const std::vector& grad_inputs,
                           std::vector* grad_outputs);
-  
+
   // Returns a list mapping whether each node in the graph is reachable
   // from outputs_. Keyed by node id.
   std::vector GetReachableNodes();
@@ -156,7 +156,7 @@ std::vector SymbolicGradientBuilder::GetReachableNodes() {
       reachable_nodes[out.node()->id()] = true;
     }
   }
-  
+
   while (!queue.empty()) {
     Node* n = queue.front();
     queue.pop_front();
diff --git a/tensorflow/cc/framework/testutil.cc b/tensorflow/cc/framework/testutil.cc
index 25ee08f676..57d573e3c5 100644
--- a/tensorflow/cc/framework/testutil.cc
+++ b/tensorflow/cc/framework/testutil.cc
@@ -37,7 +37,7 @@ void GetTensor(const Scope& scope, Output tensor, Tensor* out) {
 }
 
 void GetTensors(const Scope& scope, const std::vector& assign_vars,
-                OutputList tensors, std::vector* out) {
+                const OutputList& tensors, std::vector* out) {
   ClientSession session(scope);
   TF_CHECK_OK(session.Run(assign_vars, nullptr));
   TF_CHECK_OK(session.Run(tensors, out));
diff --git a/tensorflow/cc/framework/testutil.h b/tensorflow/cc/framework/testutil.h
index ca57c0f0a4..a3e19870ec 100644
--- a/tensorflow/cc/framework/testutil.h
+++ b/tensorflow/cc/framework/testutil.h
@@ -30,7 +30,7 @@ void GetTensors(const Scope& scope, OutputList tensors,
 // assign_vars are extra outputs that should be run
 // e.g. to assign values to variables.
 void GetTensors(const Scope& scope, const std::vector& assign_vars,
-                OutputList tensors, std::vector* out);
+                const OutputList& tensors, std::vector* out);
 
 /// Computes the output 'tensor', returning the resulting tensor in 'out'.
 void GetTensor(const Scope& scope, Output tensor, Tensor* out);
diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc
index 71995b2307..6190bd624d 100644
--- a/tensorflow/compiler/xla/literal_util.cc
+++ b/tensorflow/compiler/xla/literal_util.cc
@@ -94,14 +94,17 @@ Status Literal::CopyRange(const Literal& src_literal,
 
   TF_RET_CHECK(ShapeUtil::Rank(src_shape) == src_base.size());
   TF_RET_CHECK(ShapeUtil::Rank(dest_shape) == dest_base.size());
+
   if (ShapeUtil::Rank(src_shape) == 0 || ShapeUtil::Rank(dest_shape) == 0) {
     // If any of the two shapes are scalars, we can just call the StridedCopy()
     // directly, and we know we will be copying only one value.
     TF_RET_CHECK(copy_size.empty());
     StridedCopy(dest_data, LinearIndex(dest_base), 0, src_data,
                 src_literal.LinearIndex(src_base), 0, 1);
-  } else if (!ShapeUtil::HasZeroElements(dest_shape)) {
-    TF_RET_CHECK(!ShapeUtil::HasZeroElements(src_shape));
+  } else if (!ShapeUtil::HasZeroElements(dest_shape) &&
+             !ShapeUtil::HasZeroElements(src_shape)) {
+    // Perform copy if neither src literal nor dest literal has dimensions with
+    // zero element, otherwise it's a no-op.
     TF_RET_CHECK(src_base.size() == dest_base.size());
     TF_RET_CHECK(src_base.size() == copy_size.size());
 
diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h
index 447c494bfc..6451345918 100644
--- a/tensorflow/compiler/xla/literal_util.h
+++ b/tensorflow/compiler/xla/literal_util.h
@@ -237,6 +237,9 @@ class Literal {
   // The src_literal and this literal must have the same primitive type,
   // src_base+copy_size must fit the source literal dimensions, as well as
   // dest_base+copy_size must fit the destination literal dimensions.
+  // Note: if either src_literal or this literal contains dimensions with zero
+  // element, then copy_size must be 0 in these dimensions while the
+  // corresponding base indices being 0.
   Status Copy(const Literal& src_literal,
               tensorflow::gtl::ArraySlice src_base,
               tensorflow::gtl::ArraySlice dest_base,
diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc
index a33c0fe09d..61ceac4f9a 100644
--- a/tensorflow/compiler/xla/literal_util_test.cc
+++ b/tensorflow/compiler/xla/literal_util_test.cc
@@ -698,7 +698,7 @@ TEST_F(LiteralUtilTest, Copy) {
   for (const auto& layout : layouts) {
     Shape shape = ShapeUtil::MakeShapeWithLayout(
         primitive_util::NativeToPrimitiveType(), dimensions, layout);
-    auto blank = Literal::CreateFromShape(shape);
+
     auto source = Literal::CreateFromShape(shape);
     const int64 zero_base[] = {0, 0, 0, 0};
     const int64 step[] = {1, 1, 1, 1};
@@ -707,15 +707,15 @@ TEST_F(LiteralUtilTest, Copy) {
       source->Set(indexes, ++seqnr);
       return true;
     };
-
     ShapeUtil::ForEachIndex(source->shape(), zero_base, dimensions, step,
                             init_proc);
 
+    auto blank = Literal::CreateFromShape(shape);
     const int64 src_base[] = {3, 1, 5, 7};
     const int64 dest_base[] = {6, 4, 12, 2};
     const int64 copy_size[] = {7, 8, 11, 9};
-
     TF_EXPECT_OK(blank->Copy(*source, src_base, dest_base, copy_size));
+
     std::vector source_indexes(TF_ARRAYSIZE(dimensions), 0);
     std::vector blank_indexes(TF_ARRAYSIZE(dimensions), 0);
     bool matched = true;
@@ -730,6 +730,7 @@ TEST_F(LiteralUtilTest, Copy) {
       matched = (bval != 0 && bval == source->Get(source_indexes));
       return matched;
     };
+
     ShapeUtil::ForEachIndex(source->shape(), zero_base, copy_size, step,
                             check_proc);
     EXPECT_TRUE(matched);
@@ -749,6 +750,30 @@ TEST_F(LiteralUtilTest, CopyScalars) {
   EXPECT_EQ(vect->Get({4}), 17);
 }
 
+TEST_F(LiteralUtilTest, CopyFromAndToZeroElement) {
+  const Shape empty_r1_shape = ShapeUtil::MakeShape(F32, {0});
+  const auto const_nine = Literal::CreateR1({9});
+  const auto const_empty = Literal::CreateFromShape(empty_r1_shape);
+
+  {
+    // Source contains dimension with zero elements.
+    const auto empty = Literal::CreateFromShape(empty_r1_shape);
+    auto nine = Literal::CreateR1({9});
+
+    TF_EXPECT_OK(nine->Copy(*empty, {0}, {0}, {0}));
+    EXPECT_TRUE(nine->Equal(*const_nine));
+  }
+
+  {
+    // Copy 0 element to destination with zero elements.
+    const auto empty = Literal::CreateFromShape(empty_r1_shape);
+    auto nine = Literal::CreateR1({9});
+
+    TF_EXPECT_OK(empty->Copy(*nine, {0}, {0}, {0}));
+    EXPECT_TRUE(empty->Equal(*const_empty));
+  }
+}
+
 TEST_F(LiteralUtilTest, F16) {
   // Verify that the internal data views are consistent and that they
   // are in little endian format
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 839fe48488..8c7c2aa70e 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -198,8 +198,8 @@ class CollectProfileCandidates : public DfsHloVisitorWithDefault {
     std::unordered_map hlo_to_profile_idx;
     CollectProfileCandidates profile_candidates_for_computation(
         &hlo_to_profile_idx);
-    TF_RETURN_IF_ERROR(computation->root_instruction()->Accept(
-        &profile_candidates_for_computation));
+    TF_RETURN_IF_ERROR(
+        computation->Accept(&profile_candidates_for_computation));
     return hlo_to_profile_idx;
   }
 
@@ -433,6 +433,10 @@ Status InitializeModuleHooks(
 
 StatusOr> CpuCompiler::Compile(
     std::unique_ptr module, se::StreamExecutor* stream_exec) {
+  const string timer_message =
+      "Compiling [" + module->name() + "] for CPU using JIT";
+  ScopedLoggingTimer compiling_timer(timer_message, 1);
+
   VLOG(1) << "Compiling: " << module->name();
   TF_RET_CHECK(stream_exec != nullptr);
   std::call_once(llvm_command_line_options_initialized,
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index c5275ede65..06c94e19de 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -240,6 +240,13 @@ void IrEmitter::InitializeIrFunction(const string& function_name) {
     compute_function_->addFnAttr(llvm::Attribute::OptimizeForSize);
   }
 
+  if (hlo_module_config_.debug_options().xla_enable_fast_math()) {
+    compute_function_->addFnAttr("unsafe-fp-math", "true");
+    compute_function_->addFnAttr("no-infs-fp-math", "true");
+    compute_function_->addFnAttr("no-nans-fp-math", "true");
+    compute_function_->addFnAttr("no-signed-zeros-fp-math", "true");
+  }
+
   ir_builder_.SetInsertPoint(llvm::BasicBlock::Create(
       /*Context=*/module_->getContext(),
       /*Name=*/"entry",
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc
index 202a0171db..a40eb6afc2 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc
@@ -87,6 +87,9 @@ llvm::Function* IrEmitterNested::EmitBasePointersForNestedComputation(
     }
   }
 
+  // TODO(b/65380986): Investigate if adding fast math flags for generated
+  // kernels makes sense.
+
   llvm::BasicBlock* entry_bb =
       llvm::BasicBlock::Create(function->getContext(), "entry", function);
   // Emit a "return void" at entry_bb's end, and sets the insert point before
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 749badf3f2..b84284046b 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -201,6 +201,9 @@ llvm::Function* IrEmitterUnnested::BuildKernelPrototype(
   }
   kernel->addAttribute(temp_buffer_arg_no + 1, llvm::Attribute::NoAlias);
 
+  // TODO(b/65380986): Investigate if adding fast math flags for generated
+  // kernels makes sense.
+
   // Add the declaration of this kernel to llvm.nvvm.annotations so that NVPTX
   // treats it as a CUDA kernel.
   llvm::NamedMDNode* nvvm_annotations_node =
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index a826548349..9205f5dc4e 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -332,6 +332,53 @@ TEST_F(HloEvaluatorTest, DoesBroadcastScalar) {
   LiteralTestUtil::ExpectEqual(*result, *output_literal);
 }
 
+TEST_F(HloEvaluatorTest, DoesConcatenateSimple) {
+  HloComputation::Builder b(TestName());
+
+  HloInstruction* operand1 = b.AddInstruction(HloInstruction::CreateConstant(
+      Literal::CreateR2({{-1, -2}, {100, 200}})));
+  HloInstruction* operand2 = b.AddInstruction(HloInstruction::CreateConstant(
+      Literal::CreateR2({{-2, -3}, {-100, -200}})));
+
+  std::vector operands = {operand1, operand2};
+
+  Shape shape = ShapeUtil::MakeShape(S64, {2, 2});
+  b.AddInstruction(HloInstruction::CreateConcatenate(shape, operands, 0));
+
+  HloModule module(TestName());
+  auto computation = module.AddEntryComputation(b.Build());
+
+  std::unique_ptr result =
+      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+
+  auto expected =
+      Literal::CreateR2({{-1, -2}, {100, 200}, {-2, -3}, {-100, -200}});
+  LiteralTestUtil::ExpectEqual(*expected, *result);
+}
+
+TEST_F(HloEvaluatorTest, ConcatenateHandlesShapeWithZeroElement) {
+  HloComputation::Builder b(TestName());
+
+  HloInstruction* operand1 = b.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR1({100, 200})));
+  HloInstruction* operand2 = b.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR1({})));
+
+  std::vector operands = {operand1, operand2};
+
+  Shape shape = ShapeUtil::MakeShape(S64, {2});
+  b.AddInstruction(HloInstruction::CreateConcatenate(shape, operands, 0));
+
+  HloModule module(TestName());
+  auto computation = module.AddEntryComputation(b.Build());
+
+  std::unique_ptr result =
+      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+
+  auto expected = Literal::CreateR1({100, 200});
+  LiteralTestUtil::ExpectEqual(*expected, *result);
+}
+
 TEST_F(HloEvaluatorTest, ConvertWithSameLayout) {
   HloComputation::Builder b(TestName());
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 24ef4e09e7..ce9e0db77e 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -512,7 +512,6 @@ HloInstruction::CreateSelectAndScatter(
   instruction->set_parent(fused_root->parent());
   instruction->set_metadata(fused_root->metadata());
   instruction->CloneAndFuseInternal(fused_root);
-  instruction->CheckFusionInstruction();
   return instruction;
 }
 
@@ -636,7 +635,6 @@ HloInstruction* HloInstruction::FuseInstructionInternal(
   }
   HloInstruction* fused_instruction =
       CloneAndFuseInternal(instruction_to_fuse, add_output);
-  CheckFusionInstruction();
   return fused_instruction;
 }
 
@@ -822,74 +820,6 @@ bool HloInstruction::HasSideEffect() const {
   }
 }
 
-void HloInstruction::CheckFusionInstruction() const {
-  CHECK_EQ(opcode_, HloOpcode::kFusion);
-
-  // The parent fusion instruction of the fusion computation must be 'this'.
-  HloComputation* fused_computation = fused_instructions_computation();
-  CHECK_EQ(this, fused_computation->FusionInstruction());
-
-  // Fused root instruction and fused parameters must all be owned by the fusion
-  // computation.
-  bool root_owned = false;
-  const std::vector& fused_parameters_ = fused_parameters();
-  const HloInstruction* fused_root_ = fused_expression_root();
-  std::vector parameter_owned(fused_parameters_.size(), false);
-  for (auto& instruction : fused_computation->instructions()) {
-    if (fused_root_ == instruction.get()) {
-      CHECK(!root_owned);
-      root_owned = true;
-    }
-    for (int i = 0; i < fused_parameters_.size(); ++i) {
-      if (fused_parameters_[i] == instruction.get()) {
-        CHECK(!parameter_owned[i]);
-        parameter_owned[i] = true;
-      }
-    }
-  }
-  CHECK(root_owned);
-  // Make sure all the parameter_owned entries are set
-  for (int i = 0; i < parameter_owned.size(); i++) {
-    CHECK(parameter_owned[i]);
-  }
-
-  // Fused root must have no users.
-  CHECK_EQ(0, fused_root_->user_count());
-
-  // All uses of fused instructions must be in the fusion computation, and every
-  // non-root instruction must have at least one use.
-  for (auto& instruction : fused_instructions_computation()->instructions()) {
-    if (instruction.get() != fused_root_) {
-      CHECK_GT(instruction->user_count(), 0);
-      for (auto& user : instruction->users()) {
-        CHECK_EQ(fused_computation, user->parent());
-      }
-    }
-  }
-
-  // Fused parameter instructions must be numbered contiguously and match up
-  // (shapes compatible) with their respective operand.
-  CHECK_EQ(operands_.size(), fused_parameters_.size());
-  std::vector parameter_numbers(fused_parameters_.size(), false);
-  for (auto fused_param : fused_parameters_) {
-    int64 param_no = fused_param->parameter_number();
-    CHECK_GE(param_no, 0);
-    CHECK_LT(param_no, fused_parameters_.size());
-    CHECK(!parameter_numbers[param_no]);
-    parameter_numbers[param_no] = true;
-    CHECK(ShapeUtil::Compatible(fused_param->shape(),
-                                operands_[param_no]->shape()));
-  }
-  // Make sure all the parameter_numbers entries were seen
-  for (int i = 0; i < parameter_numbers.size(); i++) {
-    CHECK(parameter_numbers[i]);
-  }
-
-  // Operands must be distinct.
-  std::set operand_set(operands_.begin(), operands_.end());
-  CHECK_EQ(operand_set.size(), operands_.size());
-}
-
 /* static */ std::unique_ptr HloInstruction::CreateCall(
     const Shape& shape, tensorflow::gtl::ArraySlice operands,
     HloComputation* computation) {
@@ -1194,7 +1124,6 @@ std::unique_ptr HloInstruction::CloneFusionWithNewOperands(
           ->AddEmbeddedComputation(
               computation_builder.Build(FindOrDie(old_to_new, fused_root_))));
   new_instruction->set_parent(parent());
-  new_instruction->CheckFusionInstruction();
   return new_instruction;
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index ca6f27bd40..bd8b8ac9bd 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -900,9 +900,6 @@ class HloInstruction {
   // instruction to make it a bitcast.
   bool CouldBeBitcast() const;
 
-  // CHECKs various invariants of a fusion instruction.
-  void CheckFusionInstruction() const;
-
   // Get/Set the number of partitions per outer dimension (in order, starting
   // with outer-most dimension first). Currently used by the parallel cpu
   // backend to partition HLOs into parallel tasks.
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index c44be716cd..d40fceb076 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -130,6 +130,8 @@ class ShapeVerifier : public DfsHloVisitor {
   }
 
   Status HandleBroadcast(HloInstruction* broadcast) override {
+    TF_RET_CHECK(ShapeUtil::Rank(broadcast->operand(0)->shape()) ==
+                 broadcast->dimensions().size());
     return tensorflow::Status::OK();
   }
 
@@ -290,6 +292,123 @@ string ComputationsToString(
 
 }  // namespace
 
+Status HloVerifier::CheckFusionInstruction(HloInstruction* fusion) const {
+  // The parent fusion instruction of the fusion computation must be 'fusion'.
+  HloComputation* fused_computation = fusion->fused_instructions_computation();
+  if (fusion != fused_computation->FusionInstruction()) {
+    return FailedPrecondition(
+        "Instruction of fused computation does not match expected instruction "
+        "%s.",
+        fusion->ToString().c_str());
+  }
+
+  // Fused root instruction and fused parameters must all be owned by the fusion
+  // computation.
+  bool root_owned = false;
+  const std::vector& fused_parameters =
+      fusion->fused_parameters();
+  const HloInstruction* fused_root = fusion->fused_expression_root();
+  std::vector parameter_owned(fused_parameters.size(), false);
+  for (auto& instruction : fused_computation->instructions()) {
+    if (fused_root == instruction.get()) {
+      if (root_owned) {
+        return FailedPrecondition("Root appears more than once in %s.",
+                                  fusion->ToString().c_str());
+      }
+      root_owned = true;
+    }
+    for (int i = 0; i < fused_parameters.size(); ++i) {
+      if (fused_parameters[i] == instruction.get()) {
+        if (parameter_owned[i]) {
+          return FailedPrecondition("Parameter appears more than once in %s.",
+                                    fusion->ToString().c_str());
+        }
+        parameter_owned[i] = true;
+      }
+    }
+  }
+  if (!root_owned) {
+    return FailedPrecondition("Root not found in computation of %s.",
+                              fusion->ToString().c_str());
+  }
+  // Make sure all the parameter_owned entries are set
+  for (int i = 0; i < parameter_owned.size(); i++) {
+    if (!parameter_owned[i]) {
+      return FailedPrecondition("Parameter %d not found in computation of %s.",
+                                i, fusion->ToString().c_str());
+    }
+  }
+
+  // Fused root must have no users.
+  if (fused_root->user_count() != 0) {
+    return FailedPrecondition("Root of %s may not have users.",
+                              fusion->ToString().c_str());
+  }
+
+  // All uses of fused instructions must be in the fusion computation, and every
+  // non-root instruction must have at least one use.
+  for (auto& instruction :
+       fusion->fused_instructions_computation()->instructions()) {
+    if (instruction.get() != fused_root) {
+      if (instruction->user_count() == 0) {
+        return FailedPrecondition(
+            "Non-root instruction %s in %s must have users.",
+            instruction->ToString().c_str(), fusion->ToString().c_str());
+      }
+      for (auto& user : instruction->users()) {
+        if (fused_computation != user->parent()) {
+          return FailedPrecondition(
+              "Non-root instruction %s in %s may not have external users.",
+              instruction->ToString().c_str(), fusion->ToString().c_str());
+        }
+      }
+    }
+  }
+
+  // Fused parameter instructions must be numbered contiguously and match up
+  // (shapes compatible) with their respective operand.
+  CHECK_EQ(fusion->operands().size(), fused_parameters.size());
+  std::vector parameter_numbers(fused_parameters.size(), false);
+  for (auto fused_param : fused_parameters) {
+    int64 param_no = fused_param->parameter_number();
+    if (param_no < 0) {
+      return FailedPrecondition(
+          "Unexpected negative parameter number %lld in %s.", param_no,
+          fusion->ToString().c_str());
+    }
+    if (param_no >= fused_parameters.size()) {
+      return FailedPrecondition(
+          "Unexpected parameter number %lld in %s: higher then number of "
+          "parameters %lu.",
+          param_no, fusion->ToString().c_str(), fused_parameters.size());
+    }
+    if (parameter_numbers[param_no]) {
+      return FailedPrecondition(
+          "Did not expect parameter number %lld more than once in %s.",
+          param_no, fusion->ToString().c_str());
+    }
+    parameter_numbers[param_no] = true;
+    if (!ShapeUtil::Compatible(fused_param->shape(),
+                               fusion->operand(param_no)->shape())) {
+      return FailedPrecondition(
+          "Shape mismatch between parameter number %lld and its operand in %s.",
+          param_no, fusion->ToString().c_str());
+    }
+  }
+  // Make sure all the parameter_numbers entries were seen
+  for (int i = 0; i < parameter_numbers.size(); i++) {
+    if (!parameter_numbers[i]) {
+      return FailedPrecondition("Did not see parameter number %d in %s.", i,
+                                fusion->ToString().c_str());
+    }
+  }
+
+  // TODO(b/65423525): We'd like to check that all operands are distinct.
+  // This is currently disabled due to the invariant being violated by
+  // multi-output fusion.
+  return tensorflow::Status::OK();
+}
+
 StatusOr HloVerifier::Run(HloModule* module) {
   tensorflow::gtl::FlatMap instructions;
   ShapeVerifier shape_verifier(shape_size_fn_);
@@ -298,6 +417,7 @@ StatusOr HloVerifier::Run(HloModule* module) {
     for (const auto& instruction : computation->instructions()) {
       TF_RET_CHECK(instruction->parent() == computation.get());
       if (instruction->opcode() == HloOpcode::kFusion) {
+        TF_RETURN_IF_ERROR(CheckFusionInstruction(instruction.get()));
         TF_RET_CHECK(
             ContainersEqual(instruction->called_computations(),
                             {instruction->fused_instructions_computation()}))
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index bc6800dae5..e35a7f3642 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -34,6 +34,9 @@ class HloVerifier : public HloPassInterface {
   StatusOr Run(HloModule* module) override;
 
  private:
+  // CHECKs various invariants of a fusion instruction.
+  Status CheckFusionInstruction(HloInstruction* fusion) const;
+
   // Returns the size of a Shape in bytes.
   const std::function shape_size_fn_;
 };
diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc b/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc
index 607abee33d..064020896e 100644
--- a/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/reduce_precision_insertion_test.cc
@@ -425,7 +425,6 @@ TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInHeadOfFusionNode) {
   EXPECT_EQ(computation->root_instruction(), z);
   HloInstruction* y_fused = z->fused_expression_root();
   EXPECT_EQ(y_fused->opcode(), HloOpcode::kCos);
-  z->CheckFusionInstruction();
 
   // This should see that the fusion computation contains a kCos operation,
   // and insert a new reduce-precision node at its input.
@@ -450,7 +449,6 @@ TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInHeadOfFusionNode) {
   EXPECT_EQ(computation->root_instruction(), z);
   EXPECT_THAT(z->fused_expression_root(), y_fused);
   EXPECT_THAT(y_fused->operand(0), op::ReducePrecision(op::Parameter()));
-  z->CheckFusionInstruction();
 }
 
 TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInTailOfFusionNode) {
@@ -468,7 +466,6 @@ TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInTailOfFusionNode) {
       shape, HloInstruction::FusionKind::kLoop, y));
   EXPECT_IS_OK(computation->ReplaceUsesOfInstruction(y, z));
   EXPECT_IS_OK(computation->RemoveInstruction(y));
-  z->CheckFusionInstruction();
 
   // Confirm expected graph before adding reduce-precision ops.
   EXPECT_THAT(x->users(), UnorderedElementsAre(z));
@@ -498,7 +495,6 @@ TEST_F(ReducePrecisionInsertionTest, OpGetsInsertedInTailOfFusionNode) {
   EXPECT_THAT(x->users(), UnorderedElementsAre(z));
   EXPECT_EQ(computation->root_instruction(), z);
   EXPECT_THAT(z->fused_expression_root(), op::ReducePrecision(y_fused));
-  z->CheckFusionInstruction();
 }
 
 TEST_F(ReducePrecisionInsertionTest, MakeFilterFunctionNoSubstrings) {
diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc
index 297bfd93d1..858db8fa0e 100644
--- a/tensorflow/compiler/xla/service/user_computation.cc
+++ b/tensorflow/compiler/xla/service/user_computation.cc
@@ -2471,8 +2471,8 @@ HloInstruction* ComputationLowerer::ImplicitBroadcastToExplicitBroadcast(
       operand->shape().element_type(), AsInt64Slice(output_shape.dimensions()));
   // Do explicit broadcast for scalar.
   if (ShapeUtil::IsScalar(operand->shape())) {
-    return hlo_builder_.AddInstruction(HloInstruction::CreateBroadcast(
-        broadcast_shape, operand, AsInt64Slice(broadcast_shape.dimensions())));
+    return hlo_builder_.AddInstruction(
+        HloInstruction::CreateBroadcast(broadcast_shape, operand, {}));
   }
   // Do explicit broadcast for degenerate broadcast.
   std::vector broadcast_dimensions;
diff --git a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
index 606d801c84..22d2b917a1 100644
--- a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
+++ b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc
@@ -67,7 +67,7 @@ class MultiOutputFusionTest : public HloTestBase {
         elem_shape0, HloOpcode::kAdd, param0, const0));
 
     HloInstruction* broadcast = builder.AddInstruction(
-        HloInstruction::CreateBroadcast(elem_shape2, add1, {0, 1}));
+        HloInstruction::CreateBroadcast(elem_shape2, add1, {}));
 
     auto param1 = builder.AddInstruction(
         HloInstruction::CreateParameter(1, elem_shape2, "1"));
diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD
index 84fcc0d014..11e4ea888c 100644
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -24,6 +24,7 @@ py_library(
         "//tensorflow/contrib/deprecated:deprecated_py",
         "//tensorflow/contrib/distributions:distributions_py",
         "//tensorflow/contrib/eager/python:tfe",
+        "//tensorflow/contrib/estimator:estimator_py",
         "//tensorflow/contrib/factorization:factorization_py",
         "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py",
         "//tensorflow/contrib/framework:framework_py",
diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py
index d1d0e2823a..5b3f0b3f6e 100644
--- a/tensorflow/contrib/__init__.py
+++ b/tensorflow/contrib/__init__.py
@@ -29,6 +29,7 @@ from tensorflow.contrib import cudnn_rnn
 from tensorflow.contrib import data
 from tensorflow.contrib import deprecated
 from tensorflow.contrib import distributions
+from tensorflow.contrib import estimator
 from tensorflow.contrib import factorization
 from tensorflow.contrib import framework
 from tensorflow.contrib import gan
diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake
index 1b706159a3..ce94f718a1 100755
--- a/tensorflow/contrib/cmake/tf_python.cmake
+++ b/tensorflow/contrib/cmake/tf_python.cmake
@@ -218,6 +218,48 @@ add_python_module("tensorflow/python/estimator/inputs/queues")
 add_python_module("tensorflow/python/feature_column")
 add_python_module("tensorflow/python/framework")
 add_python_module("tensorflow/python/grappler")
+add_python_module("tensorflow/python/keras")
+add_python_module("tensorflow/python/keras/activations")
+add_python_module("tensorflow/python/keras/applications")
+add_python_module("tensorflow/python/keras/applications/inception_v3")
+add_python_module("tensorflow/python/keras/applications/mobilenet")
+add_python_module("tensorflow/python/keras/applications/resnet50")
+add_python_module("tensorflow/python/keras/applications/vgg16")
+add_python_module("tensorflow/python/keras/applications/vgg19")
+add_python_module("tensorflow/python/keras/applications/xception")
+add_python_module("tensorflow/python/keras/backend")
+add_python_module("tensorflow/python/keras/callbacks")
+add_python_module("tensorflow/python/keras/constraints")
+add_python_module("tensorflow/python/keras/datasets")
+add_python_module("tensorflow/python/keras/datasets/boston_housing")
+add_python_module("tensorflow/python/keras/datasets/cifar10")
+add_python_module("tensorflow/python/keras/datasets/cifar100")
+add_python_module("tensorflow/python/keras/datasets/imdb")
+add_python_module("tensorflow/python/keras/datasets/mnist")
+add_python_module("tensorflow/python/keras/datasets/reuters")
+add_python_module("tensorflow/python/keras/initializers")
+add_python_module("tensorflow/python/keras/layers")
+add_python_module("tensorflow/python/keras/losses")
+add_python_module("tensorflow/python/keras/metrics")
+add_python_module("tensorflow/python/keras/models")
+add_python_module("tensorflow/python/keras/optimizers")
+add_python_module("tensorflow/python/keras/preprocessing")
+add_python_module("tensorflow/python/keras/preprocessing/image")
+add_python_module("tensorflow/python/keras/preprocessing/sequence")
+add_python_module("tensorflow/python/keras/preprocessing/text")
+add_python_module("tensorflow/python/keras/regularizers")
+add_python_module("tensorflow/python/keras/utils")
+add_python_module("tensorflow/python/keras/wrappers")
+add_python_module("tensorflow/python/keras/wrappers/scikit_learn")
+add_python_module("tensorflow/python/keras/_impl")
+add_python_module("tensorflow/python/keras/_impl/keras")
+add_python_module("tensorflow/python/keras/_impl/keras/applications")
+add_python_module("tensorflow/python/keras/_impl/keras/datasets")
+add_python_module("tensorflow/python/keras/_impl/keras/engine")
+add_python_module("tensorflow/python/keras/_impl/keras/layers")
+add_python_module("tensorflow/python/keras/_impl/keras/preprocessing")
+add_python_module("tensorflow/python/keras/_impl/keras/utils")
+add_python_module("tensorflow/python/keras/_impl/keras/wrappers")
 add_python_module("tensorflow/python/kernel_tests")
 add_python_module("tensorflow/python/kernel_tests/distributions")
 add_python_module("tensorflow/python/layers")
@@ -299,6 +341,9 @@ add_python_module("tensorflow/contrib/distributions/python")
 add_python_module("tensorflow/contrib/distributions/python/kernel_tests")
 add_python_module("tensorflow/contrib/distributions/python/ops")
 add_python_module("tensorflow/contrib/distributions/python/ops/bijectors")
+add_python_module("tensorflow/contrib/estimator")
+add_python_module("tensorflow/contrib/estimator/python")
+add_python_module("tensorflow/contrib/estimator/python/estimator")
 add_python_module("tensorflow/contrib/factorization")
 add_python_module("tensorflow/contrib/factorization/examples")
 add_python_module("tensorflow/contrib/factorization/kernels")
diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake
index eb02f20457..9dff888155 100644
--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -142,6 +142,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
     "${tensorflow_source_dir}/tensorflow/python/debug/cli/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/debug/lib/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/debug/wrappers/*_test.py"
+    "${tensorflow_source_dir}/tensorflow/contrib/estimator/python/estimator/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/kernel_tests/*.py"
     "${tensorflow_source_dir}/tensorflow/python/meta_graph_transform/*_test.py"
     "${tensorflow_source_dir}/tensorflow/python/profiler/*_test.py"
@@ -246,6 +247,7 @@ if (tensorflow_BUILD_PYTHON_TESTS)
       # Broken tensorboard test due to cmake issues.
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py"
       "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/iterator_ops_cluster_test.py"  # Needs portpicker
+      "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/sloppy_transformation_dataset_op_test.py"  # b/65430561
       # tensor_forest tests (also note that we exclude the hybrid tests for now)
       "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/python/kernel_tests/count_extremely_random_stats_op_test.py"  # Results in wrong order.
       "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/python/kernel_tests/sample_inputs_op_test.py"  # Results in wrong order.
diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
index bc4fd10cac..f6eeb01675 100644
--- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
+++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
@@ -693,6 +693,7 @@ _cudnn_rnn_common_doc_string = """
   canonical format.
 
   This is a typical use case:
+
     * The user creates a CudnnRNN model.
     * The user query that parameter buffer size.
     * The user creates a variable of that size that serves as the parameter
diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD
index 7b916d82c1..c417650a96 100644
--- a/tensorflow/contrib/data/BUILD
+++ b/tensorflow/contrib/data/BUILD
@@ -10,6 +10,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         "//tensorflow/contrib/data/python/ops:dataset_ops",
+        "//tensorflow/contrib/data/python/ops:sloppy_ops",
         "//tensorflow/python:util",
     ],
 )
diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py
index 1c0a5288f7..c74e1369d5 100644
--- a/tensorflow/contrib/data/__init__.py
+++ b/tensorflow/contrib/data/__init__.py
@@ -23,6 +23,8 @@
 @@read_batch_features
 @@rejection_resample
 @@group_by_window
+@@sloppy_interleave
+@@sloppy_map
 """
 
 from __future__ import absolute_import
@@ -38,6 +40,7 @@ from tensorflow.contrib.data.python.ops.dataset_ops import read_batch_features
 from tensorflow.contrib.data.python.ops.dataset_ops import rejection_resample
 from tensorflow.contrib.data.python.ops.dataset_ops import TextLineDataset
 from tensorflow.contrib.data.python.ops.dataset_ops import TFRecordDataset
+from tensorflow.contrib.data.python.ops.sloppy_ops import sloppy_interleave
 # pylint: enable=unused-import
 
 from tensorflow.python.util.all_util import remove_undocumented
diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD
index fb2740ffef..2f93c34502 100644
--- a/tensorflow/contrib/data/python/kernel_tests/BUILD
+++ b/tensorflow/contrib/data/python/kernel_tests/BUILD
@@ -146,6 +146,25 @@ py_test(
     ],
 )
 
+py_test(
+    name = "sloppy_transformation_dataset_op_test",
+    size = "small",
+    srcs = ["sloppy_transformation_dataset_op_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/contrib/data/python/ops:dataset_ops",
+        "//tensorflow/contrib/data/python/ops:sloppy_ops",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:training",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "list_files_dataset_op_test",
     size = "small",
@@ -228,7 +247,7 @@ py_test(
     srcs = ["sql_dataset_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow/contrib/data",
+        "//tensorflow/contrib/data/python/ops:dataset_ops",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
index d05fbb7d28..4c1496ccf9 100644
--- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
+++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py
@@ -16,6 +16,7 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+from collections import namedtuple
 
 import os
 import threading
@@ -489,8 +490,8 @@ class MapDatasetTest(test.TestCase):
     dataset_tuple = dataset_ops.Dataset.zip((labels, images))
 
     # convert dataset of tuples to dataset of namedtuples
-    Example = namedtuple("Example", ["label", "image"])
-    dataset_namedtuple = dataset_tuple.map(Example)
+    example = namedtuple("Example", ["label", "image"])
+    dataset_namedtuple = dataset_tuple.map(example)
 
     def preprocess_tuple(label, image):
       image = 2 * image
diff --git a/tensorflow/contrib/data/python/kernel_tests/sloppy_transformation_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/sloppy_transformation_dataset_op_test.py
new file mode 100644
index 0000000000..f9198bacfb
--- /dev/null
+++ b/tensorflow/contrib/data/python/kernel_tests/sloppy_transformation_dataset_op_test.py
@@ -0,0 +1,475 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for the experimental input pipeline ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import itertools
+import math
+import threading
+import time
+
+from six.moves import zip_longest
+
+from tensorflow.contrib.data.python.ops import dataset_ops
+from tensorflow.contrib.data.python.ops import sloppy_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import script_ops
+from tensorflow.python.platform import test
+
+
+class SloppyInterleaveDatasetTest(test.TestCase):
+
+  def setUp(self):
+    self.input_values = array_ops.placeholder(dtypes.int64, shape=[None])
+    self.cycle_length = array_ops.placeholder(dtypes.int64, shape=[])
+    self.block_length = array_ops.placeholder(dtypes.int64, shape=[])
+
+    self.repeat_count = 2
+
+    # Set up threading events used to sequence when items are produced that
+    # are subsequently interleaved. These events allow us to deterministically
+    # simulate slowdowns and force sloppiness.
+    self.read_coordination_events = {}
+    self.write_coordination_events = {}
+    # input values [4, 5, 6] are the common case for the tests; set defaults
+    for i in range(4, 7):
+      self.read_coordination_events[i] = threading.Semaphore(0)
+      self.write_coordination_events[i] = threading.Event()
+
+    def map_py_fn(x):
+      self.write_coordination_events[x].wait()
+      self.write_coordination_events[x].clear()
+      self.read_coordination_events[x].release()
+      return x * x
+
+    def map_fn(x):
+      return script_ops.py_func(map_py_fn, [x], x.dtype)
+
+    def interleave_fn(x):
+      dataset = dataset_ops.Dataset.from_tensors(x)
+      dataset = dataset.repeat(x)
+      return dataset.map(map_fn)
+
+    self.dataset = (dataset_ops.Dataset.from_tensor_slices(self.input_values)
+                    .repeat(self.repeat_count).apply(
+                        sloppy_ops.sloppy_interleave,
+                        args=(interleave_fn, self.cycle_length,
+                              self.block_length)))
+    self.iterator = self.dataset.make_initializable_iterator()
+    self.init_op = self.iterator.initializer
+    self.next_element = self.iterator.get_next()
+
+  def _interleave(self, lists, cycle_length, block_length):
+    """Python implementation of interleave used for testing."""
+    num_open = 0
+
+    # `all_iterators` acts as a queue of iterators over each element of `lists`.
+    all_iterators = [iter(l) for l in lists]
+
+    # `open_iterators` are the iterators whose elements are currently being
+    # interleaved.
+    open_iterators = []
+    for i in range(cycle_length):
+      if all_iterators:
+        open_iterators.append(all_iterators.pop(0))
+        num_open += 1
+      else:
+        open_iterators.append(None)
+
+    while num_open or all_iterators:
+      for i in range(cycle_length):
+        if open_iterators[i] is None:
+          if all_iterators:
+            open_iterators[i] = all_iterators.pop(0)
+            num_open += 1
+          else:
+            continue
+        for _ in range(block_length):
+          try:
+            yield next(open_iterators[i])
+          except StopIteration:
+            open_iterators[i] = None
+            num_open -= 1
+            break
+
+  def testPythonImplementation(self):
+    input_lists = [[4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6],
+                   [4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]]
+
+    # Cycle length 1 acts like `Dataset.flat_map()`.
+    expected_elements = itertools.chain(*input_lists)
+    for expected, produced in zip(expected_elements,
+                                  self._interleave(input_lists, 1, 1)):
+      self.assertEqual(expected, produced)
+
+    # Cycle length > 1.
+    expected_elements = [
+        4, 5, 4, 5, 4, 5, 4, 5, 5, 6, 6, 4, 6, 4, 6, 4, 6, 4, 6, 5, 6, 5, 6, 5,
+        6, 5, 6, 5, 6, 6
+    ]
+    for index, (expected, produced) in enumerate(
+        zip_longest(expected_elements, self._interleave(input_lists, 2, 1))):
+      self.assertEqual(expected, produced, "Values differ at %s. %s != %s" %
+                       (index, expected, produced))
+
+  def testPythonImplementationBlockLength(self):
+    input_lists = [[4] * 4, [5] * 5, [6] * 6] * 2
+    expected_elements = [
+        4, 4, 5, 5, 4, 4, 5, 5, 5, 6, 6, 4, 4, 6, 6, 4, 4, 6, 6, 5, 5, 6, 6, 5,
+        5, 6, 6, 5, 6, 6
+    ]
+    for index, (expected, produced) in enumerate(
+        zip_longest(expected_elements, self._interleave(input_lists, 2, 2))):
+      self.assertEqual(expected, produced, "Values differ at %s. %s != %s" %
+                       (index, expected, produced))
+
+  def testPythonImplementationEmptyLists(self):
+    input_lists = [[4, 4, 4, 4], [], [6, 6, 6, 6, 6, 6], [4, 4, 4, 4], [],
+                   [6, 6, 6, 6, 6, 6]]
+
+    expected_elements = [
+        4, 4, 6, 4, 6, 4, 6, 6, 4, 6, 4, 6, 4, 4, 6, 6, 6, 6, 6, 6
+    ]
+    for index, (expected, produced) in enumerate(
+        zip_longest(expected_elements, self._interleave(input_lists, 2, 1))):
+      self.assertEqual(expected, produced, "Values differ at %s. %s != %s" %
+                       (index, expected, produced))
+
+  def _clear_coordination_events(self):
+    for i in range(4, 7):
+      self.read_coordination_events[i] = threading.Semaphore(0)
+      self.write_coordination_events[i].clear()
+
+  def _allow_all_map_threads(self):
+    for i in range(4, 7):
+      self.write_coordination_events[i].set()
+
+  def testSingleThreaded(self):
+    # cycle_length=1,block_length=1 acts like `Dataset.interleave()` and
+    # `Dataset.flat_map()` and is single-threaded. No synchronization required.
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 5, 6],
+              self.cycle_length: 1,
+              self.block_length: 1
+          })
+
+      for expected_element in self._interleave(
+          [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 1, 1):
+        self.write_coordination_events[expected_element].set()
+        self.assertEqual(expected_element * expected_element,
+                         sess.run(self.next_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testTwoThreadsNoContention(self):
+    # num_threads > 1.
+    # Explicit coordination should result in `Dataset.interleave()` behavior
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      done_first_event = False
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 5, 6],
+              self.cycle_length: 2,
+              self.block_length: 1
+          })
+      for i, expected_element in enumerate(
+          self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
+                           1)):
+        self.write_coordination_events[expected_element].set()
+        if done_first_event:  # First event starts the worker threads.
+          self.read_coordination_events[expected_element].acquire()
+        actual_element = sess.run(self.next_element)
+        if not done_first_event:
+          self.read_coordination_events[expected_element].acquire()
+          done_first_event = True
+        self.assertEqual(expected_element * expected_element, actual_element,
+                         "At index %s: %s expected, got: %s" %
+                         (i, expected_element, actual_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testTwoThreadsNoContentionWithRaces(self):
+    """Tests where all the workers race in producing elements.
+
+    Note: this is in contrast with the prevous test which carefully sequences
+    the execution of the map functions.
+    """
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      done_first_event = False
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 5, 6],
+              self.cycle_length: 2,
+              self.block_length: 1
+          })
+      for i, expected_element in enumerate(
+          self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
+                           1)):
+        if done_first_event:  # First event starts the worker threads.
+          self._allow_all_map_threads()
+          self.read_coordination_events[expected_element].acquire()
+        else:
+          self.write_coordination_events[expected_element].set()
+        time.sleep(0.01)  # Sleep to consistently "avoid" the race condition.
+        actual_element = sess.run(self.next_element)
+        if not done_first_event:
+          done_first_event = True
+          self.assertTrue(
+              self.read_coordination_events[expected_element].acquire(False))
+        self.assertEqual(expected_element * expected_element, actual_element,
+                         "At index %s: %s expected, got: %s" %
+                         (i, expected_element, actual_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testTwoThreadsNoContentionBlockLength(self):
+    # num_threads > 1.
+    # Explicit coordination should result in `Dataset.interleave()` behavior
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      done_first_event = False
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 5, 6],
+              self.cycle_length: 2,
+              self.block_length: 2
+          })
+      for i, expected_element in enumerate(
+          self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
+                           2)):
+        self.write_coordination_events[expected_element].set()
+        if done_first_event:  # First event starts the worker threads.
+          self.read_coordination_events[expected_element].acquire()
+        actual_element = sess.run(self.next_element)
+        if not done_first_event:
+          done_first_event = True
+          self.read_coordination_events[expected_element].acquire()
+        self.assertEqual(expected_element * expected_element, actual_element,
+                         "At index %s: %s expected, got: %s" %
+                         (i, expected_element, actual_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testTwoThreadsNoContentionWithRacesAndBlocking(self):
+    """Tests where all the workers race in producing elements.
+
+    Note: this is in contrast with the prevous test which carefully sequences
+    the execution of the map functions.
+    """
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      done_first_event = False
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 5, 6],
+              self.cycle_length: 2,
+              self.block_length: 2
+          })
+      for i, expected_element in enumerate(
+          self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
+                           2)):
+        if done_first_event:  # First event starts the worker threads.
+          self._allow_all_map_threads()
+          self.read_coordination_events[expected_element].acquire()
+        else:
+          self.write_coordination_events[expected_element].set()
+        time.sleep(0.01)  # Sleep to consistently "avoid" the race condition.
+        actual_element = sess.run(self.next_element)
+        if not done_first_event:
+          done_first_event = True
+          self.assertTrue(
+              self.read_coordination_events[expected_element].acquire(False))
+        self.assertEqual(expected_element * expected_element, actual_element,
+                         "At index %s: %s expected, got: %s" %
+                         (i, expected_element, actual_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testEmptyInput(self):
+    with self.test_session() as sess:
+      # Empty input.
+      self._clear_coordination_events()
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [],
+              self.cycle_length: 2,
+              self.block_length: 3
+          })
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testNonEmptyInputIntoEmptyOutputs(self):
+    # Non-empty input leading to empty output.
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [0, 0, 0],
+              self.cycle_length: 2,
+              self.block_length: 3
+          })
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testPartiallyEmptyOutputs(self):
+    # Mixture of non-empty and empty interleaved datasets.
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      done_first_event = False
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 0, 6],
+              self.cycle_length: 2,
+              self.block_length: 1
+          })
+      for i, expected_element in enumerate(
+          self._interleave([[4] * 4, [], [6] * 6] * self.repeat_count, 2, 1)):
+        self.write_coordination_events[expected_element].set()
+        if done_first_event:  # First event starts the worker threads
+          self.read_coordination_events[expected_element].acquire()
+        actual_element = sess.run(self.next_element)
+        if not done_first_event:
+          done_first_event = True
+          self.read_coordination_events[expected_element].acquire()
+        self.assertEqual(expected_element * expected_element, actual_element,
+                         "At index %s: %s expected, got: %s" %
+                         (i, expected_element, actual_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testDelayedOutput(self):
+    # Explicitly control the sequence of events to ensure we correctly avoid
+    # head-of-line blocking.
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 5, 6],
+              self.cycle_length: 2,
+              self.block_length: 1
+          })
+
+      mis_ordering = [
+          4, 4, 5, 4, 5, 5, 4, 5, 6, 6, 6, 5, 4, 4, 6, 6, 4, 4, 6, 5, 6, 6, 6,
+          6, 5, 5, 5, 5, 6, 6
+      ]
+      for element in mis_ordering:
+        self.write_coordination_events[element].set()
+        self.assertEqual(element * element, sess.run(self.next_element))
+        self.assertTrue(self.read_coordination_events[element].acquire(False))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testBlockLengthWithContention(self):
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      done_first_event = False
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 5, 6],
+              self.cycle_length: 2,
+              self.block_length: 3
+          })
+      # Test against a generating sequence that differs from the uncontended
+      # case, in order to prove sloppy correctness.
+      for i, expected_element in enumerate(
+          self._interleave(
+              [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count,
+              cycle_length=2,
+              block_length=2)):
+        self.write_coordination_events[expected_element].set()
+        if done_first_event:  # First event starts the worker threads.
+          self.read_coordination_events[expected_element].acquire()
+        actual_element = sess.run(self.next_element)
+        if not done_first_event:
+          self.read_coordination_events[expected_element].acquire()
+          done_first_event = True
+        self.assertEqual(expected_element * expected_element, actual_element,
+                         "At index %s: %s expected, got: %s" %
+                         (i, expected_element, actual_element))
+      with self.assertRaises(errors.OutOfRangeError):
+        sess.run(self.next_element)
+
+  def testEarlyExit(self):
+    # Exiting without consuming all input should not block
+    with self.test_session() as sess:
+      self._clear_coordination_events()
+      sess.run(
+          self.init_op,
+          feed_dict={
+              self.input_values: [4, 5, 6],
+              self.cycle_length: 3,
+              self.block_length: 2
+          })
+      for i in range(4, 7):
+        self.write_coordination_events[i].set()
+      elem = sess.run(self.next_element)  # Start all workers
+      # Allow the one successful worker to progress beyond the py_func again.
+      elem = int(math.sqrt(elem))
+      self.write_coordination_events[elem].set()
+      self.read_coordination_events[elem].acquire()
+      # Allow the prefetch to succeed
+      for i in range(4, 7):
+        self.read_coordination_events[i].acquire()
+        self.write_coordination_events[i].set()
+
+  def testTooManyReaders(self):
+
+    def interleave_fn(x):
+      dataset = dataset_ops.Dataset.from_tensors(x)
+      dataset = dataset.repeat(math_ops.cast(x, dtype=dtypes.int64))
+      return dataset
+
+    dataset = dataset_ops.Dataset.from_tensor_slices([4, 5, 6])
+    dataset = dataset.repeat(self.repeat_count)
+    dataset = dataset.apply(
+        sloppy_ops.sloppy_interleave,
+        args=(interleave_fn,),
+        kwargs={"cycle_length": 16,
+                "block_length": 2})
+    iterator = dataset.make_one_shot_iterator()
+
+    with self.test_session() as sess:
+      output_values = []
+      for _ in range(30):
+        output_values.append(sess.run(iterator.get_next()))
+
+    expected_values = self._interleave(
+        [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 1, 2)
+    self.assertItemsEqual(output_values, expected_values)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD
index 8afd122d82..94969c1c70 100644
--- a/tensorflow/contrib/data/python/ops/BUILD
+++ b/tensorflow/contrib/data/python/ops/BUILD
@@ -32,6 +32,21 @@ py_library(
     ],
 )
 
+py_library(
+    name = "sloppy_ops",
+    srcs = ["sloppy_ops.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":dataset_ops",
+        "//tensorflow/contrib/data/python/framework:function",
+        "//tensorflow/contrib/data/python/util:nest",
+        "//tensorflow/python:dataset_ops_gen",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:platform",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/data/python/ops/sloppy_ops.py b/tensorflow/contrib/data/python/ops/sloppy_ops.py
new file mode 100644
index 0000000000..010bd31161
--- /dev/null
+++ b/tensorflow/contrib/data/python/ops/sloppy_ops.py
@@ -0,0 +1,120 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Non-deterministic dataset transformations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.data.python.framework import function
+from tensorflow.contrib.data.python.ops import dataset_ops
+from tensorflow.contrib.data.python.util import nest
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import gen_dataset_ops
+
+
+class SloppyInterleaveDataset(dataset_ops.Dataset):
+  """A `Dataset` that maps a function over its input and flattens the result."""
+
+  def __init__(self, input_dataset, map_func, cycle_length, block_length):
+    """See `tf.contrib.data.sloppy_interleave()` for details."""
+    super(SloppyInterleaveDataset, self).__init__()
+    self._input_dataset = input_dataset
+
+    @function.Defun(*nest.flatten(input_dataset.output_types))
+    def tf_map_func(*args):
+      """A wrapper for Defun that facilitates shape inference."""
+      # Pass in shape information from the input_dataset.
+      for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)):
+        arg.set_shape(shape)
+
+      nested_args = nest.pack_sequence_as(input_dataset.output_types, args)
+
+      if nest.is_sequence(nested_args):
+        dataset = map_func(*nested_args)
+      else:
+        dataset = map_func(nested_args)
+
+      if not isinstance(dataset, dataset_ops.Dataset):
+        raise TypeError("`map_func` must return a `Dataset` object.")
+
+      self._output_types = dataset.output_types
+      self._output_shapes = dataset.output_shapes
+
+      return dataset.make_dataset_resource()
+
+    self._map_func = tf_map_func
+    self._map_func.add_to_graph(ops.get_default_graph())
+
+    self._cycle_length = ops.convert_to_tensor(
+        cycle_length, dtype=dtypes.int64, name="cycle_length")
+    self._block_length = ops.convert_to_tensor(
+        block_length, dtype=dtypes.int64, name="block_length")
+
+  def make_dataset_resource(self):
+    return gen_dataset_ops.sloppy_interleave_dataset(
+        self._input_dataset.make_dataset_resource(),
+        self._map_func.captured_inputs,
+        self._cycle_length,
+        self._block_length,
+        f=self._map_func,
+        output_types=nest.flatten(self.output_types),
+        output_shapes=nest.flatten(self.output_shapes))
+
+  @property
+  def output_shapes(self):
+    return self._output_shapes
+
+  @property
+  def output_types(self):
+    return self._output_types
+
+
+def sloppy_interleave(dataset, map_func, cycle_length, block_length):
+  """Maps `map_func` across `dataset`, and interleaves the results.
+
+  The resulting dataset is almost identical to `interleave`. The key
+  difference being that if retrieving a value from a given output iterator would
+  cause `get_next` to block, that iterator will be skipped, and consumed
+  when next available. If consuming from all iterators would cause the
+  `get_next` call to block, the `get_next` call blocks until the first value is
+  available.
+
+  If the underlying datasets produce elements as fast as they are consumed, the
+  `sloppy_interleave` dataset behaves identically to the `interleave` dataset.
+  However, if an underlying dataset would block the consumer, the
+  `sloppy_interleave` dataset can violate to the round-robin order (respected by
+  the `interleave` dataset), producing an element from a different underlying
+  dataset instead.
+
+  WARNING: The order of elements in the resulting dataset is not
+  deterministic. Use `Dataset.interleave()` if you want the elements to have a
+  deterministic order.
+
+  Args:
+    dataset: A `Dataset` that produces elements to feed to `map_func`.
+    map_func: A function mapping a nested structure of tensors (having shapes
+      and types defined by `self.output_shapes` and `self.output_types`) to a
+      `Dataset`.
+    cycle_length: The number of threads to interleave from in parallel.
+    block_length: The number of consecutive elements to pull from a thread
+      before advancing to the next thread. Note: sloppy_interleave will
+      skip the remainder of elements in the block_length in order to avoid
+      blocking.
+
+  Returns:
+    A `Dataset`.
+  """
+  return SloppyInterleaveDataset(dataset, map_func, cycle_length, block_length)
diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index e29314099d..1b831f8afb 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -2,11 +2,14 @@ licenses(["notice"])  # Apache 2.0
 
 package(default_visibility = ["//tensorflow:internal"])
 
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+
 py_library(
     name = "tfe",
     srcs = ["tfe.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":saver",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:util",
         "//tensorflow/python/eager:backprop",
@@ -18,6 +21,28 @@ py_library(
     ],
 )
 
+py_library(
+    name = "saver",
+    srcs = ["saver.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:training",
+    ],
+)
+
+cuda_py_test(
+    name = "saver_test",
+    srcs = ["saver_test.py"],
+    additional_deps = [
+        ":saver",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:platform_test",
+        "//tensorflow/python:variables",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/eager/python/saver.py b/tensorflow/contrib/eager/python/saver.py
new file mode 100644
index 0000000000..12c902a4b6
--- /dev/null
+++ b/tensorflow/contrib/eager/python/saver.py
@@ -0,0 +1,122 @@
+"""Saver for eager mode TensorFlow."""
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import contextlib
+
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import saver as _saver
+
+
+def _init_from_checkpoint(self, *args, **kwargs):
+  """Overrides default init by loading value from checkpoint."""
+  self.old_init(*args, **kwargs)
+  # pylint: disable=protected-access
+  if self._shared_name not in self.ckpt_var_cache:
+    raise errors.NotFoundError(None, None,
+                               "%s not found in checkpoint" % self._shared_name)
+
+  val = self.ckpt_var_cache[self._shared_name]
+  if val is not None:
+    self.assign(self.ckpt_var_cache[self._shared_name])
+    # Avoid assigning for the second time.
+    self.ckpt_var_cache[self._shared_name] = None
+  # pylint: enable=protected-access
+
+
+class Saver(object):
+  """A simple tf.train.Saver adapter for eager mode.
+
+    save and restore API are similar to the tf.train.Saver, except that
+    session is not needed.
+
+    restore_on_create is eager mode's way to reload checkpoint value during
+    the execution. (unlike graph mode's reload before run).
+
+  Args:
+    var_list: See tf.train.Saver. Works the same for save/restore. Ignored
+        by restore_on_create.
+  """
+
+  def __init__(self, var_list=None):
+    self._saver = _saver.Saver(var_list=var_list)
+
+  def save(self, save_path, global_step=None):
+    """Saves variables.
+
+    Args:
+      save_path: See save method in tf.train.Saver.
+      global_step: See save method in tf.train.Saver.
+
+    Returns:
+      See save method in tf.train.Saver.
+    """
+    return self._saver.save(None, save_path, global_step=global_step)
+
+  def restore(self, save_path):
+    """Restores previously saved variables.
+
+    Args:
+      save_path: See restore method in tf.train.Saver.
+    """
+    self._saver.restore(None, save_path)
+
+  @contextlib.contextmanager
+  def maybe_restore_on_create(self, save_path):
+    """ContextManager that restores variables on creation.
+
+      When save_path is None (e.g. No checkpoint), does nothing.
+      Otherwise, it preloads all values from checkpoint. When the
+      corresponding variable is first created, it assigns the checkpoint
+      value to the variable.
+
+    Args:
+      save_path: Same as save_path of retore. If None, do not restore.
+
+    Yields:
+      Nothing.
+
+    Raises:
+      NotFoundError: If the variable is not found in checkpoint.
+    """
+    if save_path:
+      ckpt_var_cache = dict()
+      reader = checkpoint_utils.load_checkpoint(save_path)
+      for k, _ in checkpoint_utils.list_variables(save_path):
+        ckpt_var_cache[k] = reader.get_tensor(k)
+
+      old_init = getattr(
+          resource_variable_ops.ResourceVariable, "_init_from_args", None)
+      assert old_init, "ResourceVariable misses _init_from_args method."
+      setattr(resource_variable_ops.ResourceVariable, "_init_from_args",
+              _init_from_checkpoint)
+      setattr(resource_variable_ops.ResourceVariable, "old_init", old_init)
+      setattr(resource_variable_ops.ResourceVariable, "ckpt_var_cache",
+              ckpt_var_cache)
+    try:
+      yield
+    except Exception as e:
+      raise e
+    finally:
+      if save_path:
+        setattr(resource_variable_ops.ResourceVariable, "_init_from_args",
+                old_init)
+        setattr(resource_variable_ops.ResourceVariable, "old_init", None)
+        setattr(resource_variable_ops.ResourceVariable, "ckpt_var_cache", None)
diff --git a/tensorflow/contrib/eager/python/saver_test.py b/tensorflow/contrib/eager/python/saver_test.py
new file mode 100644
index 0000000000..b8ff566ec2
--- /dev/null
+++ b/tensorflow/contrib/eager/python/saver_test.py
@@ -0,0 +1,88 @@
+"""Tests for eager mode Saver."""
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from tensorflow.contrib.eager.python import saver as _saver
+from tensorflow.python.eager import context
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.platform import test
+
+
+class SaverTest(test.TestCase):
+
+  def testBasics(self):
+    with context.eager_mode():
+      v1 = resource_variable_ops.ResourceVariable(1.0, name='v1')
+      def model():
+        return array_ops.constant(2.0) * v1
+
+      ckpt_prefix = os.path.join(test.get_temp_dir(), 'ckpt')
+
+      _ = model()
+      saver = _saver.Saver()
+      saver.save(ckpt_prefix)
+      v1.assign(2.0)
+      self.assertEqual(v1.read_value().numpy(), 2.0)
+
+      saver.restore(ckpt_prefix)
+      self.assertEqual(v1.read_value().numpy(), 1.0)
+
+  def testRestoreOnCreate(self):
+    with context.eager_mode():
+      def model(init_val):
+        v1 = resource_variable_ops.ResourceVariable(init_val, name='v1')
+        return array_ops.constant(1.0) * v1
+
+      ckpt_prefix = os.path.join(test.get_temp_dir(), 'ckpt')
+      _ = model(1.0)
+      saver = _saver.Saver()
+      saver.save(ckpt_prefix)
+
+      with ops.Graph().as_default():
+        saver = _saver.Saver()
+        with saver.maybe_restore_on_create(ckpt_prefix):
+          # Value is from checkpoint, but not from argument.
+          ret = model(2.0)
+          self.assertEqual(ret.numpy(), 1.0)
+          # Create it a second time won't re-assign the checkpoint value.
+          v1_2 = resource_variable_ops.ResourceVariable(3.0, name='v1')
+          self.assertEqual(v1_2.read_value().numpy(), 3.0)
+
+  def testRestoreNotFound(self):
+    with context.eager_mode():
+      def model(v):
+        return array_ops.constant(1.0) * v
+
+      ckpt_prefix = os.path.join(test.get_temp_dir(), 'ckpt')
+      _ = model(resource_variable_ops.ResourceVariable(1.0, name='v1'))
+      saver = _saver.Saver()
+      saver.save(ckpt_prefix)
+
+      with self.assertRaisesRegexp(errors.NotFoundError,
+                                   'v2 not found in checkpoint'):
+        with saver.maybe_restore_on_create(ckpt_prefix):
+          _ = model(resource_variable_ops.ResourceVariable(1.0, name='v2'))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py
index aa0276dfd9..2c7494a0a8 100644
--- a/tensorflow/contrib/eager/python/tfe.py
+++ b/tensorflow/contrib/eager/python/tfe.py
@@ -42,6 +42,8 @@ To use, at program startup, call `tfe.enable_eager_execution()`.
 @@inf_nan_callback
 @@nan_callback
 @@seterr
+
+@@Saver
 """
 
 from __future__ import absolute_import
@@ -51,6 +53,7 @@ from __future__ import print_function
 
 # pylint:disable=g-bad-import-order,g-import-not-at-top,unused-import
 #
+from tensorflow.contrib.eager.python.saver import Saver
 from tensorflow.python.util.all_util import remove_undocumented
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager.custom_gradient import custom_gradient
diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD
new file mode 100644
index 0000000000..46cdf086dd
--- /dev/null
+++ b/tensorflow/contrib/estimator/BUILD
@@ -0,0 +1,61 @@
+package(
+    default_visibility = [
+        "//tensorflow:internal",
+    ],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
+
+py_library(
+    name = "estimator_py",
+    srcs = ["__init__.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":extenders",
+    ],
+)
+
+py_library(
+    name = "extenders",
+    srcs = [
+        "python/estimator/extenders.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:util",
+        "//tensorflow/python/estimator",
+        "//tensorflow/python/estimator:model_fn",
+        "//tensorflow/python/estimator:util",
+    ],
+)
+
+py_test(
+    name = "extenders_test",
+    size = "small",
+    srcs = ["python/estimator/extenders_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":extenders",
+        "//tensorflow/contrib/data/python/ops:dataset_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:metrics",
+        "//tensorflow/python/estimator",
+        "//tensorflow/python/estimator:linear",
+        "//tensorflow/python/feature_column",
+        "//third_party/py/numpy",
+    ],
+)
diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py
new file mode 100644
index 0000000000..9180a3acc3
--- /dev/null
+++ b/tensorflow/contrib/estimator/__init__.py
@@ -0,0 +1,29 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Experimental utilities re:tf.estimator.*."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=unused-import,line-too-long,wildcard-import
+from tensorflow.contrib.estimator.python.estimator.extenders import *
+
+from tensorflow.python.util.all_util import remove_undocumented
+# pylint: enable=unused-import,line-too-long,wildcard-import
+
+_allowed_symbols = ['add_metrics']
+
+remove_undocumented(__name__, allowed_exception_list=_allowed_symbols)
diff --git a/tensorflow/contrib/estimator/python/estimator/extenders.py b/tensorflow/contrib/estimator/python/estimator/extenders.py
new file mode 100644
index 0000000000..45dd9ef70d
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/extenders.py
@@ -0,0 +1,124 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Extenders of tf.estimator.Estimator."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.estimator import estimator as estimator_lib
+from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.estimator import util as estimator_util
+from tensorflow.python.util import tf_inspect
+
+_VALID_METRIC_FN_ARGS = set(['features', 'labels', 'predictions', 'config'])
+
+
+def add_metrics(estimator, metric_fn):
+  """Creates new ${tf.estimator.Esitmator} which has given metrics.
+
+  Example:
+
+  ```python
+    def my_auc(labels, predictions):
+      return {'auc': tf.metrics.auc(labels, predictions['logistic'])}
+
+    estimator = tf.estimator.DNNClassifier(...)
+    estimator = tf.contrib.estimator.add_metrics(estimator, my_auc)
+    estimator.train(...)
+    estimator.evaluate(...)
+  ```
+  Example usage of custom metric which uses features:
+
+  ```python
+    def my_auc(features, labels, predictions):
+      return {'auc': tf.metrics.auc(
+        labels, predictions['logistic'], weights=features['weight'])}
+
+    estimator = tf.estimator.DNNClassifier(...)
+    estimator = tf.contrib.estimator.add_metrics(estimator, my_auc)
+    estimator.train(...)
+    estimator.evaluate(...)
+  ```
+
+  Args:
+    estimator: A ${tf.estimator.Esitmator} object.
+    metric_fn: A function which should obey the following signature:
+      - Args: can only have following four arguments in any order:
+        * predictions: Predictions `Tensor` or dict of `Tensor` created by given
+          `estimator`.
+        * features: Input `dict` of `Tensor` objects created by `input_fn` which
+          is given to `estimator.evaluate` as an argument.
+        * labels:  Labels `Tensor` or dict of `Tensor` created by `input_fn`
+          which is given to `estimator.evaluate` as an argument.
+        * config: config attribute of the `estimator`.
+       - Returns:
+         Dict of metric results keyed by name. Final metrics are a union of this
+         and `estimator's` existing metrics. If there is a name conflict between
+         this and `estimator`s existing metrics, this will override the existing
+         one. The values of the dict are the results of calling a metric
+         function, namely a `(metric_tensor, update_op)` tuple.
+
+  Returns:
+      A new ${tf.estimator.Estimator} which has a union of original metrics with
+        given ones.
+  """
+  _verify_metric_fn_args(metric_fn)
+
+  def new_model_fn(features, labels, mode):
+    spec = _get_model_fn(estimator)(features, labels, mode)
+    if mode != model_fn_lib.ModeKeys.EVAL:
+      return spec
+    new_metrics = _call_metric_fn(metric_fn, features, labels, spec.predictions,
+                                  estimator.config)
+    all_metrics = spec.eval_metric_ops or {}
+    all_metrics.update(new_metrics)
+    return spec._replace(eval_metric_ops=all_metrics)
+
+  return estimator_lib.Estimator(
+      model_fn=new_model_fn,
+      model_dir=estimator.model_dir,
+      config=estimator.config)
+
+
+# TODO(ispir): Move this to tf.estimator.Estimator.
+def _get_model_fn(estimator):
+  return estimator._call_model_fn  # pylint: disable=protected-access
+
+
+def _verify_metric_fn_args(metric_fn):
+  args = set(estimator_util.fn_args(metric_fn))
+  if tf_inspect.ismethod(metric_fn):
+    if 'self' in args:
+      args.remove('self')
+  invalid_args = list(args - _VALID_METRIC_FN_ARGS)
+  if invalid_args:
+    raise ValueError('metric_fn (%s) has following not expected args: %s' %
+                     (metric_fn, invalid_args))
+
+
+def _call_metric_fn(metric_fn, features, labels, predictions, config):
+  """Calls metric fn with proper arguments."""
+  metric_fn_args = estimator_util.fn_args(metric_fn)
+  kwargs = {}
+  if 'features' in metric_fn_args:
+    kwargs['features'] = features
+  if 'labels' in metric_fn_args:
+    kwargs['labels'] = labels
+  if 'predictions' in metric_fn_args:
+    kwargs['predictions'] = predictions
+  if 'config' in metric_fn_args:
+    kwargs['config'] = config
+  return metric_fn(**kwargs)
diff --git a/tensorflow/contrib/estimator/python/estimator/extenders_test.py b/tensorflow/contrib/estimator/python/estimator/extenders_test.py
new file mode 100644
index 0000000000..422c16d24e
--- /dev/null
+++ b/tensorflow/contrib/estimator/python/estimator/extenders_test.py
@@ -0,0 +1,135 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""extenders tests."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.data.python.ops import dataset_ops
+from tensorflow.contrib.estimator.python.estimator import extenders
+from tensorflow.python.estimator import run_config
+from tensorflow.python.estimator.canned import linear
+from tensorflow.python.feature_column import feature_column as fc
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import metrics as metrics_lib
+from tensorflow.python.platform import test
+
+
+def get_input_fn(x, y):
+
+  def input_fn():
+    dataset = dataset_ops.Dataset.from_tensor_slices({'x': x, 'y': y})
+    iterator = dataset.make_one_shot_iterator()
+    features = iterator.get_next()
+    labels = features.pop('y')
+    return features, labels
+
+  return input_fn
+
+
+class AddMetricsTest(test.TestCase):
+
+  def test_should_add_metrics(self):
+    input_fn = get_input_fn(
+        x=np.arange(4)[:, None, None], y=np.ones(4)[:, None])
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+
+    def metric_fn(features):
+      return {'mean_x': metrics_lib.mean(features['x'])}
+
+    estimator = extenders.add_metrics(estimator, metric_fn)
+
+    estimator.train(input_fn=input_fn)
+    metrics = estimator.evaluate(input_fn=input_fn)
+    self.assertIn('mean_x', metrics)
+    self.assertEqual(1.5, metrics['mean_x'])
+    # assert that it keeps original estimators metrics
+    self.assertIn('auc', metrics)
+
+  def test_should_error_out_for_not_recognized_args(self):
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+
+    def metric_fn(features, not_recognized):
+      _, _ = features, not_recognized
+      return {}
+
+    with self.assertRaisesRegexp(ValueError, 'not_recognized'):
+      estimator = extenders.add_metrics(estimator, metric_fn)
+
+  def test_all_supported_args(self):
+    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+
+    def metric_fn(features, predictions, labels, config):
+      self.assertIn('x', features)
+      self.assertIsNotNone(labels)
+      self.assertIn('logistic', predictions)
+      self.assertTrue(isinstance(config, run_config.RunConfig))
+      return {}
+
+    estimator = extenders.add_metrics(estimator, metric_fn)
+
+    estimator.train(input_fn=input_fn)
+    estimator.evaluate(input_fn=input_fn)
+
+  def test_all_supported_args_in_different_order(self):
+    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+
+    def metric_fn(labels, config, features, predictions):
+      self.assertIn('x', features)
+      self.assertIsNotNone(labels)
+      self.assertIn('logistic', predictions)
+      self.assertTrue(isinstance(config, run_config.RunConfig))
+      return {}
+
+    estimator = extenders.add_metrics(estimator, metric_fn)
+
+    estimator.train(input_fn=input_fn)
+    estimator.evaluate(input_fn=input_fn)
+
+  def test_all_args_are_optional(self):
+    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+
+    def metric_fn():
+      return {'two': metrics_lib.mean(constant_op.constant([2.]))}
+
+    estimator = extenders.add_metrics(estimator, metric_fn)
+
+    estimator.train(input_fn=input_fn)
+    metrics = estimator.evaluate(input_fn=input_fn)
+    self.assertEqual(2., metrics['two'])
+
+  def test_overrides_existing_metrics(self):
+    input_fn = get_input_fn(x=[[[0.]]], y=[[[1]]])
+    estimator = linear.LinearClassifier([fc.numeric_column('x')])
+    estimator.train(input_fn=input_fn)
+    metrics = estimator.evaluate(input_fn=input_fn)
+    self.assertNotEqual(2., metrics['auc'])
+
+    def metric_fn():
+      return {'auc': metrics_lib.mean(constant_op.constant([2.]))}
+
+    estimator = extenders.add_metrics(estimator, metric_fn)
+    metrics = estimator.evaluate(input_fn=input_fn)
+    self.assertEqual(2., metrics['auc'])
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/fused_conv/BUILD b/tensorflow/contrib/fused_conv/BUILD
index f5d21278db..9b34cf1bdb 100644
--- a/tensorflow/contrib/fused_conv/BUILD
+++ b/tensorflow/contrib/fused_conv/BUILD
@@ -60,12 +60,14 @@ tf_kernel_library(
     srcs = [
         "kernels/fused_conv2d_bias_activation_op.cc",
         "kernels/fused_conv2d_bias_activation_op.h",
+        "kernels/fused_conv_ops_gpu.h",
     ],
     prefix = "fused_conv2d_bias_activation_op",
     deps = [
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_proto_parsing",
+        "//tensorflow/core:stream_executor",
         "//tensorflow/core/kernels:bounds_check_lib",
         "//tensorflow/core/kernels:conv_2d_hdrs",
         "//tensorflow/core/kernels:conv_ops_gpu_hdrs",
@@ -81,6 +83,7 @@ tf_custom_op_library(
     srcs = [
         "kernels/fused_conv2d_bias_activation_op.cc",
         "kernels/fused_conv2d_bias_activation_op.h",
+        "kernels/fused_conv_ops_gpu.h",
         "ops/fused_conv2d_bias_activation_op.cc",
     ],
     deps = [
@@ -94,12 +97,8 @@ tf_custom_op_library(
 )
 
 tf_gen_op_libs(
-    op_lib_names = [
-        "fused_conv2d_bias_activation_op",
-    ],
-    deps = [
-        "//tensorflow/core:lib_proto_parsing",
-    ],
+    op_lib_names = ["fused_conv2d_bias_activation_op"],
+    deps = ["//tensorflow/core:lib_proto_parsing"],
 )
 
 tf_gen_op_wrapper_py(
@@ -109,7 +108,7 @@ tf_gen_op_wrapper_py(
 
 cuda_py_test(
     name = "fused_conv2d_bias_activation_op_test",
-    size = "small",
+    size = "large",
     srcs = ["python/ops/fused_conv2d_bias_activation_op_test.py"],
     additional_deps = [
         ":fused_conv_py",
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
index dc0701b234..675ff2be38 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#define EIGEN_USE_THREADS
-
 #if GOOGLE_CUDA
 #define EIGEN_USE_GPU
 #endif  // GOOGLE_CUDA
@@ -31,8 +29,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/conv_2d.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/util/padding.h"
-#include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
 
 #if GOOGLE_CUDA
@@ -40,38 +38,84 @@ limitations under the License.
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/util/activation_mode.h"
 #endif  // GOOGLE_CUDA
+
 namespace tensorflow {
 
-typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 
-template 
-struct LaunchConvOp;
+template 
+struct RawType {
+  using type = T;
+};
+
+template <>
+struct RawType {
+  using type = int8;
+};
+
+// Template struct to convert int8x4 to int32.
+// (for NCHW_VECT_C with element type int8, we can consider it to be
+// an NCHW layout with element type int32 for operations like padding).
+template 
+struct Int8x4ToInt32 {
+  // By default, do not change T.
+  using type = T;
+};
+
+template <>
+struct Int8x4ToInt32 {
+  using type = int32;
+};
 
-template 
+// T is the element type of the conv_input, filter and side_input tensors.
+// BiasType is the element type of the bias tensor, which can be different.
+// ScaleType is the type used for conv_input_scale, side_input_scale.
+template 
 class FusedConv2DBiasActivationOp : public OpKernel {
  public:
   explicit FusedConv2DBiasActivationOp(OpKernelConstruction* context)
       : OpKernel(context) {
-    string data_format;
-    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format));
-    OP_REQUIRES(context, FormatFromString(data_format, &data_format_),
+    string data_format_str, filter_format_str;
+    OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str));
+    OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_),
                 errors::InvalidArgument("Invalid data format"));
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("filter_format", &filter_format_str));
     OP_REQUIRES(context,
-                (data_format_ == FORMAT_NHWC || data_format_ == FORMAT_NCHW),
-                errors::InvalidArgument("Current implementation only supports "
-                                        "NHWC and NCHW data formats."));
-    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
-    OP_REQUIRES(context, strides_.size() == 4,
+                FilterFormatFromString(filter_format_str, &filter_format_),
+                errors::InvalidArgument("Invalid filter format"));
+
+    std::vector strides;
+    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides));
+    OP_REQUIRES(context, strides.size() == 4,
                 errors::InvalidArgument("Sliding window strides field must "
                                         "specify 4 dimensions"));
+
+    stride_rows_ = GetTensorDim(strides, data_format_, 'H');
+    stride_cols_ = GetTensorDim(strides, data_format_, 'W');
     OP_REQUIRES(
         context,
-        (GetTensorDim(strides_, data_format_, 'N') == 1 &&
-         GetTensorDim(strides_, data_format_, 'C') == 1),
-        errors::InvalidArgument("Current implementation does not yet support "
-                                "strides in the batch and depth dimensions."));
-    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+        (GetTensorDim(strides, data_format_, 'N') == 1 &&
+         GetTensorDim(strides, data_format_, 'C') == 1),
+        errors::InvalidArgument("Convolutional strides are not supported in "
+                                "the batch or depth dimensions."));
+
+    // Assuming qint8 <--> NCHW_VECT_C, OIHW_VECT_I (int8x4) here.
+    constexpr bool is_int8x4 = std::is_same::value;
+
+    // Note: Only NCHW_VECT_C format is supported for int8.
+    // This is because it is expected to be the fastest, and our previous tests
+    // found cudnn 6 does not fully support the other formats for int8 mode.
+    OP_REQUIRES(context, (is_int8x4 == (data_format_ == FORMAT_NCHW_VECT_C)),
+                errors::InvalidArgument(
+                    "qint8 should be used with data_format NCHW_VECT_C."));
+
+    OP_REQUIRES(context, (is_int8x4 == (filter_format_ == FORMAT_OIHW_VECT_I)),
+                errors::InvalidArgument(
+                    "qint8 should be used with filter_format OIHW_VECT_I."));
+
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_type_));
+    eigen_padding_type_ = BrainPadding2EigenPadding(padding_type_);
     string activation_mode_str;
     OP_REQUIRES_OK(context,
                    context->GetAttr("activation_mode", &activation_mode_str));
@@ -79,130 +123,111 @@ class FusedConv2DBiasActivationOp : public OpKernel {
                                                         &activation_mode_));
     OP_REQUIRES(context, activation_mode_ == ActivationMode::RELU,
                 errors::InvalidArgument("Current implementation only supports "
-                                        "relu as the activation mode."));
+                                        "RELU as the activation function."));
     cudnn_use_autotune_ = CudnnUseAutotune();
+    float conv_input_scale_flt, side_input_scale_flt;
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("conv_input_scale", &conv_input_scale_flt));
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("side_input_scale", &side_input_scale_flt));
+    conv_input_scale_ = conv_input_scale_flt;
+    side_input_scale_ = side_input_scale_flt;
+  }
+
+  Status CheckShape(const Tensor& tensor, const string& tensor_name) {
+    const int num_dims = tensor.dims();
+    for (int i = 0; i < num_dims; i++) {
+      if (!FastBoundsCheck(tensor.dim_size(i),
+                           std::numeric_limits::max())) {
+        return errors::InvalidArgument(tensor_name, " dimension ", i,
+                                       " too large");
+      }
+    }
+    // If there is a 5th dimension it is the VECT_C or VECT_I dimension.
+    if (num_dims == 5 && tensor.dim_size(4) != 4) {
+      return errors::InvalidArgument("The last dimension of ", tensor_name,
+                                     " must be of size 4 for qint8.");
+    }
+    return Status::OK();
   }
 
   void Compute(OpKernelContext* context) override {
-    // Input tensor is one of the following shapes:
-    // [ batch, in_rows, in_cols, in_depth ] (for NHWC data format)
-    // [ batch, in_depth, in_rows, in_cols ] (for NCHW data format)
-    const Tensor& input = context->input(0);
+    // The conv_input tensor is one of the following formats:
+    // NHWC, NCHW, NCHW_VECT_C.
+    const Tensor& conv_input = context->input(0);
+    OP_REQUIRES_OK(context, CheckShape(conv_input, "conv_input"));
 
-    // Input filter is of the following dimensions:
-    // [ filter_rows, filter_cols, in_depth, out_depth ]
+    // The filter tensor is one of the following formats:
+    // HWIO, OIHW, OIHW_VECT_I.
     const Tensor& filter = context->input(1);
+    OP_REQUIRES_OK(context, CheckShape(filter, "filter"));
 
-    // Input bias is a 1-D tensor the size of the last
-    // dimension of Output tensor
+    // Input bias is a 1-D tensor, with size matching output depth.
     const Tensor& bias = context->input(2);
+    OP_REQUIRES_OK(context, CheckShape(bias, "conv_input"));
 
-    // For 2D convolution, there should be 4 dimensions.
-    OP_REQUIRES(context, input.dims() == 4,
-                errors::InvalidArgument("input must be 4-dimensional",
-                                        input.shape().DebugString()));
-    OP_REQUIRES(context, filter.dims() == 4,
-                errors::InvalidArgument("filter must be 4-dimensional: ",
-                                        filter.shape().DebugString()));
-
-    // Bias should be a 1-D tensor.
-    OP_REQUIRES(context, bias.dims() == 1,
-                errors::InvalidArgument("bias must be 1-dimensional: ",
-                                        bias.shape().DebugString()));
-
-    for (int i = 0; i < 4; i++) {
-      OP_REQUIRES(context,
-                  FastBoundsCheck(filter.dim_size(i),
-                                  std::numeric_limits::max()),
-                  errors::InvalidArgument("filter dimension too large"));
-      OP_REQUIRES(
-          context,
-          FastBoundsCheck(input.dim_size(i), std::numeric_limits::max()),
-          errors::InvalidArgument("input dimension too large"));
+    // If side_input_scale != 0, then side_input is not ignored and
+    // has the same type and dimensions as the output.
+    const Tensor& side_input = context->input(3);
+    if (side_input_scale_ != 0) {
+      OP_REQUIRES_OK(context, CheckShape(side_input, "side_input"));
     }
 
-    // The last dimension for input is in_depth. It must be the same as the
-    // filter's in_depth.
-    const int64 in_depth = GetTensorDim(input, data_format_, 'C');
-    OP_REQUIRES(context, in_depth == filter.dim_size(2),
-                errors::InvalidArgument(
-                    "input and filter must have the same depth: ", in_depth,
-                    " vs ", filter.dim_size(2)));
-
-    // The last dimension for filter is out_depth.
-    const int32 out_depth = static_cast(filter.dim_size(3));
-
-    // The second dimension for input is rows/height.
-    // The first dimension for filter is rows/height.
-    const int64 input_rows_raw = GetTensorDim(input, data_format_, 'H');
-    const int32 input_rows = static_cast(input_rows_raw);
-    const int32 filter_rows = static_cast(filter.dim_size(0));
-
-    // The third dimension for input is columns/width.
-    // The second dimension for filter is columns/width.
-    const int64 input_cols_raw = GetTensorDim(input, data_format_, 'W');
-    const int32 input_cols = static_cast(input_cols_raw);
-    const int32 filter_cols = static_cast(filter.dim_size(1));
-
-    // The first dimension for input is batch.
-    const int64 batch_raw = GetTensorDim(input, data_format_, 'N');
-    const int32 batch = static_cast(batch_raw);
-
-    // For now we take the stride from the second and third dimensions only (we
-    // do not support striding on the batch or depth dimension).
-    const int32 stride_rows =
-        static_cast(GetTensorDim(strides_, data_format_, 'H'));
-    const int32 stride_cols =
-        static_cast(GetTensorDim(strides_, data_format_, 'W'));
-    const int32 bias_size = static_cast(bias.dim_size(0));
-
-    int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0;
-    OP_REQUIRES_OK(context,
-                   GetWindowedOutputSize(input_rows, filter_rows, stride_rows,
-                                         padding_, &out_rows, &pad_rows));
-    OP_REQUIRES_OK(context,
-                   GetWindowedOutputSize(input_cols, filter_cols, stride_cols,
-                                         padding_, &out_cols, &pad_cols));
-    // Output tensor is of the following dimensions:
-    // [ in_batch, out_rows, out_cols, out_depth ]
-    TensorShape out_shape =
-        ShapeFromFormat(data_format_, batch, out_rows, out_cols, out_depth);
+    // TODO(pauldonnelly): Switch to a more efficient mechanism to access
+    // dimension indexes and per-dimension attributes.
+    const int32 filter_rows = GetFilterDim(filter, filter_format_, 'H');
+    const int32 filter_cols = GetFilterDim(filter, filter_format_, 'W');
+    const int32 output_depth = GetFilterDim(filter, filter_format_, 'O');
+
+    const int32 batch_size = GetTensorDim(conv_input, data_format_, 'N');
+    const int32 conv_input_rows = GetTensorDim(conv_input, data_format_, 'H');
+    const int32 conv_input_cols = GetTensorDim(conv_input, data_format_, 'W');
+
+    int64 output_rows = 0, output_cols = 0, pad_rows = 0, pad_cols = 0;
+    OP_REQUIRES_OK(context, GetWindowedOutputSize(conv_input_rows, filter_rows,
+                                                  stride_rows_, padding_type_,
+                                                  &output_rows, &pad_rows));
+    OP_REQUIRES_OK(context, GetWindowedOutputSize(conv_input_cols, filter_cols,
+                                                  stride_cols_, padding_type_,
+                                                  &output_cols, &pad_cols));
+    // Initialize the output tensor shape according to data_format_
+    TensorShape output_shape = ShapeFromFormat(
+        data_format_, batch_size, output_rows, output_cols, output_depth);
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
-
-    // Bias size should be the same as the size of the channel dimension of
-    // output.
-    OP_REQUIRES(context, bias_size == out_depth,
-                errors::InvalidArgument(
-                    "bias size should equal the channel "
-                    "dimension size of output. bias shape: ",
-                    bias.shape().DebugString() +
-                        ", output shape: " + output->shape().DebugString()));
+    OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
 
-    VLOG(2) << "FusedConv2DBiasActivation: in_depth = " << in_depth
-            << ", input_cols = " << input_cols
+    VLOG(2) << "FusedConv2DBiasActivation: conv_input_cols = "
+            << conv_input_cols << ", conv_input_rows = " << conv_input_rows
             << ", filter_cols = " << filter_cols
-            << ", input_rows = " << input_rows
             << ", filter_rows = " << filter_rows
-            << ", stride_rows = " << stride_rows
-            << ", stride_cols = " << stride_cols
-            << ", bias_size = " << bias_size << ", out_depth = " << out_depth;
+            << ", stride_cols = " << stride_cols_
+            << ", stride_rows = " << stride_rows_
+            << ", output_depth = " << output_depth
+            << ", output_cols = " << output_cols
+            << ", output_rows = " << output_rows
+            << ", output_shape.num_elements = " << output_shape.num_elements();
 
     // If there is nothing to compute, return.
-    if (out_shape.num_elements() == 0) {
+    if (output_shape.num_elements() == 0) {
       return;
     }
-    launcher_.launch(context, cudnn_use_autotune_, input, filter, stride_rows,
-                     stride_cols, bias, activation_mode_,
-                     BrainPadding2EigenPadding(padding_), data_format_, output);
+
+    launcher_.launch(context, cudnn_use_autotune_, conv_input,
+                     conv_input_scale_, filter, stride_rows_, stride_cols_,
+                     eigen_padding_type_, side_input, side_input_scale_, bias,
+                     activation_mode_, data_format_, filter_format_, output);
   }
 
  private:
-  std::vector strides_;
-  Padding padding_;
+  int32 stride_rows_, stride_cols_;
+  Padding padding_type_;
+  Eigen::PaddingType eigen_padding_type_;
   ActivationMode activation_mode_;
   TensorFormat data_format_;
-  LaunchFusedConv2DBiasActivationOp launcher_;
+  FilterTensorFormat filter_format_;
+  ScaleType conv_input_scale_;
+  ScaleType side_input_scale_;
+  LaunchFusedConv2DBiasActivationOp launcher_;
   bool cudnn_use_autotune_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(FusedConv2DBiasActivationOp);
@@ -211,67 +236,72 @@ class FusedConv2DBiasActivationOp : public OpKernel {
 #if GOOGLE_CUDA
 namespace dnn = ::perftools::gputools::dnn;
 
-dnn::ActivationMode BrainActivationMode2CudnnActivationMode(
-    ActivationMode activation_mode) {
-  switch (activation_mode) {
-    case ActivationMode::SIGMOID:
-      return dnn::ActivationMode::kSigmoid;
-    case ActivationMode::RELU:
-      return dnn::ActivationMode::kRelu;
-    case ActivationMode::RELUX:
-      return dnn::ActivationMode::kReluX;
-    case ActivationMode::RELU6:
-      return dnn::ActivationMode::kRelu6;
-    case ActivationMode::TANH:
-      return dnn::ActivationMode::kTanh;
-    case ActivationMode::BANDPASS:
-      return dnn::ActivationMode::kBandPass;
-  }
-  // Prevent compiler warning about missing return
-  return dnn::ActivationMode::kRelu;
-}
-
 // A dummy type to group forward convolution autotune results together.
 struct ConvBiasActivationAutoTuneGroup {
   static string name() { return "ConvBiasActivation"; }
 };
-typedef AutoTuneSingleton
+typedef AutoTuneSingleton
     AutoTuneConvBiasActivation;
 
-template 
-void LaunchFusedConv2DBiasActivationOp::launch(
-    OpKernelContext* ctx, bool cudnn_use_autotune, const Tensor& input_param,
-    const Tensor& filter, int32 row_stride, int32 col_stride,
-    const Tensor& bias, const ActivationMode& activation_mode,
-    const Eigen::PaddingType& padding, TensorFormat data_format,
-    Tensor* output) {
-  using perftools::gputools::dnn::AlgorithmConfig;
-  using perftools::gputools::dnn::AlgorithmType;
-  using perftools::gputools::dnn::ProfileResult;
-  using perftools::gputools::dnn::kDefaultAlgorithm;
+// Allocates 'transformed_tensor' and transforms 'nhwc_tensor' into it
+// using the specified 'batch_size', 'rows', 'cols', and 'depth' dimensions.
+template 
+Status TransformNHWCToNCHW(OpKernelContext* ctx, const Tensor& nhwc_tensor,
+                           int batch_size, int rows, int cols, int depth,
+                           Tensor* transformed_tensor, const Tensor** result) {
+  TensorShape nchw_shape =
+      ShapeFromFormat(FORMAT_NCHW, batch_size, rows, cols, depth);
+  if (depth > 1) {
+    TF_RETURN_IF_ERROR(ctx->allocate_temp(DataTypeToEnum::value, nchw_shape,
+                                          transformed_tensor));
+    functor::NHWCToNCHW()(
+        ctx->eigen_device(), nhwc_tensor.tensor(),
+        transformed_tensor->tensor());
+  } else {
+    // If depth <= 1, then just reshape.
+    CHECK(transformed_tensor->CopyFrom(nhwc_tensor, nchw_shape));
+  }
+  *result = transformed_tensor;
+  return Status::OK();
+}
+
+template 
+void LaunchFusedConv2DBiasActivationOp::
+    launch(OpKernelContext* ctx, bool cudnn_use_autotune,
+           const Tensor& conv_input_param, ScaleType conv_input_scale,
+           const Tensor& filter_param, int32 row_stride, int32 col_stride,
+           const Eigen::PaddingType& padding, const Tensor& side_input_param,
+           ScaleType side_input_scale, const Tensor& bias,
+           ActivationMode activation_mode, TensorFormat data_format,
+           FilterTensorFormat filter_format, Tensor* output_param) {
   auto* stream = ctx->op_device_context()->stream();
   OP_REQUIRES(ctx, stream, errors::Internal("No GPU stream available."));
 
-  Tensor input = input_param;
-
-  perftools::gputools::dnn::ActivationMode cudnn_activation_mode =
-      BrainActivationMode2CudnnActivationMode(activation_mode);
-
   // TODO(yangzihao): refactor all the complicated/duplicated code in regular
   // conv ops to a shared conv utility.
-  int32 padding_rows = 0;
-  int32 padding_cols = 0;
-  const int64 in_batch = GetTensorDim(input, data_format, 'N');
-  int64 in_rows = GetTensorDim(input, data_format, 'H');
-  int64 in_cols = GetTensorDim(input, data_format, 'W');
-  const int64 in_depths = GetTensorDim(input, data_format, 'C');
-  const int64 out_batch = GetTensorDim(*output, data_format, 'N');
-  const int64 out_rows = GetTensorDim(*output, data_format, 'H');
-  const int64 out_cols = GetTensorDim(*output, data_format, 'W');
-  const int64 out_depths = GetTensorDim(*output, data_format, 'C');
-  const int64 patch_rows = filter.dim_size(0);
-  const int64 patch_cols = filter.dim_size(1);
+
+  // Assuming qint8 <--> NCHW_VECT_C, OIHW_VECT_I (int8x4) here.
+  constexpr bool is_int8x4 = std::is_same::value;
+  constexpr int rank = is_int8x4 ? 5 : 4;
+  constexpr int vect = is_int8x4 ? 4 : 1;
+
+  const int batch_size = GetTensorDim(conv_input_param, data_format, 'N');
+  int conv_input_rows = GetTensorDim(conv_input_param, data_format, 'H');
+  int conv_input_cols = GetTensorDim(conv_input_param, data_format, 'W');
+
+  const int conv_input_depth =
+      GetTensorDim(conv_input_param, data_format, 'C') * vect;
+  const int output_rows = GetTensorDim(*output_param, data_format, 'H');
+  const int output_cols = GetTensorDim(*output_param, data_format, 'W');
+  const int output_depth = GetFilterDim(filter_param, filter_format, 'O');
+  const int filter_rows = GetFilterDim(filter_param, filter_format, 'H');
+  const int filter_cols = GetFilterDim(filter_param, filter_format, 'W');
+  int padding_rows = 0;
+  int padding_cols = 0;
+  const Tensor* conv_input = &conv_input_param;
+
+  Tensor maybe_padded_conv_input;
   if (padding == Eigen::PADDING_SAME) {
     // Total padding on rows and cols is
     // Pr = (R' - 1) * S + Kr - R
@@ -281,114 +311,152 @@ void LaunchFusedConv2DBiasActivationOp::launch(
     // We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top
     // and Pc - Pc/2 on the bottom.  When Pr or Pc is odd, this means
     // we pad more on the right and bottom than on the top and left.
-    padding_rows =
-        std::max(0, (out_rows - 1) * row_stride + patch_rows - in_rows);
-    padding_cols =
-        std::max(0, (out_cols - 1) * col_stride + patch_cols - in_cols);
-    const int rows_parity = padding_rows & 1;
-    const int cols_parity = padding_cols & 1;
-    if ((rows_parity | cols_parity) != 0) {
+    padding_rows = std::max(
+        0, (output_rows - 1) * row_stride + filter_rows - conv_input_rows);
+    padding_cols = std::max(
+        0, (output_cols - 1) * col_stride + filter_cols - conv_input_cols);
+    const int padding_rows_parity = padding_rows & 1;
+    const int padding_cols_parity = padding_cols & 1;
+    if ((padding_rows_parity | padding_cols_parity) != 0) {
       Tensor transformed_input;
-      int64 new_in_rows = in_rows + rows_parity;
-      int64 new_in_cols = in_cols + cols_parity;
+      const int new_conv_input_rows = conv_input_rows + padding_rows_parity;
+      const int new_conv_input_cols = conv_input_cols + padding_cols_parity;
+
+      using VectT = typename Int8x4ToInt32::type>::type;
+      auto pad_data_format = is_int8x4 ? FORMAT_NCHW : data_format;
+
       OP_REQUIRES_OK(
-          ctx,
-          ctx->allocate_temp(DataTypeToEnum::value,
-                             ShapeFromFormat(data_format, in_batch, new_in_rows,
-                                             new_in_cols, in_depths),
-                             &transformed_input));
-
-      functor::PadInput()(
-          ctx->eigen_device(), To32Bit(input_param.tensor()),
-          {{0, 0}}, {{rows_parity, cols_parity}},
-          To32Bit(transformed_input.tensor()), data_format);
-
-      input = transformed_input;
-      in_rows = new_in_rows;
-      in_cols = new_in_cols;
+          ctx, ctx->allocate_temp(
+                   DataTypeToEnum::value,
+                   ShapeFromFormat(data_format, batch_size, new_conv_input_rows,
+                                   new_conv_input_cols, conv_input_depth),
+                   &maybe_padded_conv_input));
+
+      auto conv_input_eigen_tensor =
+          To32Bit(conv_input_param.reinterpret_last_dimension());
+      auto padded_conv_input_eigen_tensor = To32Bit(
+          maybe_padded_conv_input.reinterpret_last_dimension());
+
+      functor::PadInput()(
+          ctx->eigen_device(), conv_input_eigen_tensor, {{0, 0}},
+          {{padding_rows_parity, padding_cols_parity}},
+          padded_conv_input_eigen_tensor, pad_data_format);
+
+      conv_input = &maybe_padded_conv_input;
+      conv_input_rows = new_conv_input_rows;
+      conv_input_cols = new_conv_input_cols;
     }
   }
 
-  if (data_format == FORMAT_NHWC) {
-    // Convert the input tensor from NHWC to NCHW.
-    TensorShape nchw_shape =
-        ShapeFromFormat(FORMAT_NCHW, in_batch, in_rows, in_cols, in_depths);
-    if (in_depths > 1) {
-      Tensor transformed_input;
-      OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum::value,
-                                             nchw_shape, &transformed_input));
-      functor::NHWCToNCHW()(
-          ctx->eigen_device(),
-          const_cast(input).tensor(),
-          transformed_input.tensor());
-      input = transformed_input;
-    } else {
-      // If depth <= 1, then just reshape.
-      CHECK(input.CopyFrom(input, nchw_shape));
+  Tensor maybe_transformed_conv_input, maybe_transformed_side_input;
+  Tensor maybe_transformed_output;
+  const Tensor* side_input = &side_input_param;
+  Tensor* output = output_param;
+
+  // NOTE: Here and elsewhere, checking 'is_int8x4' may look unnecessary
+  // and inefficient, but it is actually both a time and code size optimization,
+  // since 'is_int8x4' is a constexpr determined by the template parameter.
+  if (!is_int8x4 && data_format == FORMAT_NHWC) {
+    OP_REQUIRES_OK(ctx, (TransformNHWCToNCHW(
+                            ctx, *conv_input, batch_size, conv_input_rows,
+                            conv_input_cols, conv_input_depth,
+                            &maybe_transformed_conv_input, &conv_input)));
+    if (side_input_scale != 0) {
+      OP_REQUIRES_OK(
+          ctx, (TransformNHWCToNCHW(
+                   ctx, side_input_param, batch_size, output_rows, output_cols,
+                   output_depth, &maybe_transformed_side_input, &side_input)));
+    }
+    if (output_depth > 1) {
+      // Allocate a tensor for the NCHW output of the kernel and point output
+      // to it. Afterwards, we will transform it to NHWC while copying back to
+      // 'output_param'.
+      TensorShape nchw_shape = ShapeFromFormat(
+          FORMAT_NCHW, batch_size, output_rows, output_cols, output_depth);
+      OP_REQUIRES_OK(ctx,
+                     ctx->allocate_temp(DataTypeToEnum::value, nchw_shape,
+                                        &maybe_transformed_output));
+      output = &maybe_transformed_output;
     }
   }
 
-  CHECK(padding_rows >= 0 && padding_cols >= 0)
-      << "Negative row or col paddings: (" << padding_rows << ", "
-      << padding_cols << ")";
-  perftools::gputools::dnn::BatchDescriptor input_desc;
-  input_desc.set_count(in_batch)
-      .set_feature_map_count(in_depths)
-      .set_height(in_rows)
-      .set_width(in_cols)
-      .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX);
-  perftools::gputools::dnn::BatchDescriptor output_desc;
-  output_desc.set_count(out_batch)
-      .set_height(out_rows)
-      .set_width(out_cols)
-      .set_feature_map_count(out_depths)
-      .set_layout(perftools::gputools::dnn::DataLayout::kBatchDepthYX);
-  perftools::gputools::dnn::FilterDescriptor filter_desc;
-  filter_desc.set_input_filter_height(filter.dim_size(0))
-      .set_input_filter_width(filter.dim_size(1))
-      .set_input_feature_map_count(filter.dim_size(2))
-      .set_output_feature_map_count(filter.dim_size(3));
-  perftools::gputools::dnn::ConvolutionDescriptor conv_desc;
+  constexpr auto data_layout = is_int8x4 ? dnn::DataLayout::kBatchDepthYX4
+                                         : dnn::DataLayout::kBatchDepthYX;
+  constexpr auto filter_layout = is_int8x4 ? dnn::FilterLayout::kOutputInputYX4
+                                           : dnn::FilterLayout::kOutputInputYX;
+
+  dnn::BatchDescriptor conv_input_desc;
+  conv_input_desc.set_count(batch_size)
+      .set_feature_map_count(conv_input_depth)
+      .set_height(conv_input_rows)
+      .set_width(conv_input_cols)
+      .set_layout(data_layout);
+  dnn::FilterDescriptor filter_desc;
+  filter_desc.set_input_filter_height(filter_rows)
+      .set_input_filter_width(filter_cols)
+      .set_input_feature_map_count(conv_input_depth)
+      .set_output_feature_map_count(output_depth)
+      .set_layout(filter_layout);
+  dnn::BatchDescriptor side_input_desc;
+  side_input_desc.set_count(batch_size)
+      .set_height(output_rows)
+      .set_width(output_cols)
+      .set_feature_map_count(output_depth)
+      .set_layout(data_layout);
+  dnn::BatchDescriptor bias_desc;
+  bias_desc.set_count(1)
+      .set_height(1)
+      .set_width(1)
+      .set_feature_map_count(output_depth)
+      .set_layout(dnn::DataLayout::kBatchDepthYX);
+  dnn::BatchDescriptor output_desc;
+  output_desc.set_count(batch_size)
+      .set_height(output_rows)
+      .set_width(output_cols)
+      .set_feature_map_count(output_depth)
+      .set_layout(data_layout);
+  dnn::ConvolutionDescriptor conv_desc;
   conv_desc.set_vertical_filter_stride(row_stride)
       .set_horizontal_filter_stride(col_stride)
       .set_zero_padding_height(padding_rows / 2)
       .set_zero_padding_width(padding_cols / 2);
 
-  // Shuffles a filter tensor from:
-  //   [, in, out]
-  // to:
-  //   [out, in, ]
-  // TODO(yangzihao): Support a data layout tag for the filter weights, and only
-  // do the transform if the weights are not already in the correct layout.
-  Tensor transformed_filter;
-  OP_REQUIRES_OK(ctx, ctx->allocate_temp(
-                          DataTypeToEnum::value,
-                          TensorShape({filter.dim_size(3), filter.dim_size(2),
-                                       filter.dim_size(0), filter.dim_size(1)}),
-                          &transformed_filter));
-
-  functor::TransformFilter()(
-      ctx->eigen_device(), To32Bit(filter.tensor()),
-      To32Bit(transformed_filter.tensor()));
-
-  Tensor transformed_output;
-  OP_REQUIRES_OK(
-      ctx, ctx->allocate_temp(DataTypeToEnum::value,
-                              ShapeFromFormat(FORMAT_NCHW, out_batch, out_rows,
-                                              out_cols, out_depths),
-                              &transformed_output));
-
-  auto input_ptr = AsDeviceMemory(input.template flat().data(),
-                                  input.template flat().size());
+  Tensor maybe_transformed_filter;
+  const Tensor* filter;
+  if (is_int8x4) {
+    // We have already checked filter is OIHW_VECT_I in the constructor.
+    filter = &filter_param;
+  } else if (filter_format == FORMAT_HWIO) {
+    // Shuffle filter tensor from HWIO to OIHW:
+    OP_REQUIRES_OK(ctx, ctx->allocate_temp(
+                            DataTypeToEnum::value,
+                            ShapeFromFilterFormat(
+                                FORMAT_OIHW, filter_param.shape(), FORMAT_HWIO),
+                            &maybe_transformed_filter));
+    functor::TransformFilter()(
+        ctx->eigen_device(), To32Bit(filter_param.tensor()),
+        To32Bit(maybe_transformed_filter.tensor()));
+    filter = &maybe_transformed_filter;
+  }
+
+  auto conv_input_ptr =
+      AsDeviceMemory(reinterpret_cast::type*>(
+                         conv_input->template flat().data()),
+                     conv_input->template flat().size());
   auto filter_ptr =
-      AsDeviceMemory(transformed_filter.template flat().data(),
-                     transformed_filter.template flat().size());
+      AsDeviceMemory(reinterpret_cast::type*>(
+                         filter->template flat().data()),
+                     filter->template flat().size());
+  auto side_input_ptr =
+      AsDeviceMemory(reinterpret_cast::type*>(
+                         side_input->template flat().data()),
+                     side_input->template flat().size());
   auto output_ptr =
-      AsDeviceMemory(transformed_output.template flat().data(),
-                     transformed_output.template flat().size());
-
-  auto bias_ptr = AsDeviceMemory(bias.template flat().data(),
-                                 bias.template flat().size());
+      AsDeviceMemory(reinterpret_cast::type*>(
+                         output->template flat().data()),
+                     output->template flat().size());
+  auto bias_ptr = AsDeviceMemory(bias.template flat().data(),
+                                 bias.template flat().size());
 
   static int64 ConvolveScratchSize = GetCudnnWorkspaceLimit(
       // default value is in bytes despite the name of the environment variable
@@ -396,38 +464,42 @@ void LaunchFusedConv2DBiasActivationOp::launch(
   );
 
   int device_id = stream->parent()->device_ordinal();
-  DataType dtype = input.dtype();
-  ConvParameters conv_parameters = {
-      in_batch,
-      in_depths,
-      {{in_rows, in_cols}},
-      out_depths,
-      {{patch_rows, patch_cols}},
+  FusedConvParameters fused_conv_parameters = {
+      batch_size,
+      conv_input_depth,
+      {{conv_input_rows, conv_input_cols}},
+      output_depth,
+      {{filter_rows, filter_cols}},
       {{row_stride, col_stride}},
       {{padding_rows, padding_cols}},
-      dtype,
+      conv_input->dtype(),
       device_id,
+      (side_input_scale != 0),
+      activation_mode,
   };
 
-  AlgorithmConfig algorithm_config;
+  dnn::AlgorithmConfig algorithm_config;
   if (cudnn_use_autotune && !AutoTuneConvBiasActivation::GetInstance()->Find(
-                                conv_parameters, &algorithm_config)) {
-    std::vector algorithms;
+                                fused_conv_parameters, &algorithm_config)) {
+    std::vector algorithms;
     CHECK(stream->parent()->GetConvolveAlgorithms(
-        conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms));
-    ProfileResult best_result;
-    ProfileResult best_result_no_scratch;
+        fused_conv_parameters.ShouldIncludeWinogradNonfusedAlgo(),
+        &algorithms));
+    dnn::ProfileResult best_result;
+    dnn::ProfileResult best_result_no_scratch;
     for (auto profile_algorithm : algorithms) {
       // TODO(zhengxq): profile each algorithm multiple times to better
       // accuracy.
       CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
-      ProfileResult profile_result;
+      dnn::ProfileResult profile_result;
       bool cudnn_launch_status =
           stream
-              ->ThenConvolveWithAlgorithm(
-                  input_desc, input_ptr, filter_desc, filter_ptr, conv_desc,
-                  bias_ptr, cudnn_activation_mode, output_desc, &output_ptr,
-                  &scratch_allocator, AlgorithmConfig(profile_algorithm),
+              ->ThenFusedConvolveWithAlgorithm(
+                  conv_input_desc, conv_input_ptr, conv_input_scale,
+                  filter_desc, filter_ptr, conv_desc, side_input_ptr,
+                  side_input_scale, bias_desc, bias_ptr,
+                  dnn::ActivationMode::kRelu, output_desc, &output_ptr,
+                  &scratch_allocator, dnn::AlgorithmConfig(profile_algorithm),
                   &profile_result)
               .ok();
       if (cudnn_launch_status) {
@@ -454,42 +526,68 @@ void LaunchFusedConv2DBiasActivationOp::launch(
       algorithm_config.set_algorithm_no_scratch(
           best_result_no_scratch.algorithm());
     }
-    AutoTuneConvBiasActivation::GetInstance()->Insert(conv_parameters,
+    AutoTuneConvBiasActivation::GetInstance()->Insert(fused_conv_parameters,
                                                       algorithm_config);
   }
 
   CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
   bool cudnn_launch_status =
       stream
-          ->ThenConvolveWithAlgorithm(
-              input_desc, input_ptr, filter_desc, filter_ptr, conv_desc,
-              bias_ptr, cudnn_activation_mode, output_desc, &output_ptr,
-              &scratch_allocator, algorithm_config,
+          ->ThenFusedConvolveWithAlgorithm(
+              conv_input_desc, conv_input_ptr, conv_input_scale, filter_desc,
+              filter_ptr, conv_desc, side_input_ptr, side_input_scale,
+              bias_desc, bias_ptr, dnn::ActivationMode::kRelu, output_desc,
+              &output_ptr, &scratch_allocator, algorithm_config,
               /*output_profile_result=*/nullptr)
           .ok();
 
   if (!cudnn_launch_status) {
-    ctx->SetStatus(errors::Internal(
-        "cuDNN launch failure : input shape(", input.shape().DebugString(),
-        ") filter shape(", filter.shape().DebugString(), ")"));
+    ctx->SetStatus(errors::Internal("cuDNN launch failure : conv_input shape(",
+                                    conv_input->shape().DebugString(),
+                                    ") filter shape(",
+                                    filter->shape().DebugString(), ")"));
   }
 
-  // Convert the output tensor back from NCHW to NHWC.
-  if (data_format == FORMAT_NHWC) {
+  // Convert the output tensor back from NCHW to NHWC if necessary.
+  if (!is_int8x4 && (data_format == FORMAT_NHWC) && (output_depth > 1)) {
     functor::NCHWToNHWC()(
         ctx->eigen_device(),
-        const_cast(transformed_output).tensor(),
-        output->tensor());
-  } else {
-    *output = transformed_output;
+        const_cast(output)->tensor(),
+        output_param->tensor());
   }
 }
 
+// Forward declarations of the functor specializations for GPU used above.
+namespace functor {
+#define DECLARE_GPU_SPEC(T)                                              \
+  template <>                                                            \
+  void PadInput::operator()(                       \
+      const GPUDevice& d, typename TTypes::ConstTensor in,    \
+      const std::array& padding_left,                            \
+      const std::array& padding_right,                           \
+      typename TTypes::Tensor out, TensorFormat data_format); \
+  extern template struct PadInput;
+
+DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(int32);
+#undef DECLARE_GPU_SPEC
+}  // namespace functor
+
 // Registration of the GPU implementations.
-REGISTER_KERNEL_BUILDER(Name("FusedConv2DBiasActivation")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint("T"),
-                        FusedConv2DBiasActivationOp);
+
+REGISTER_KERNEL_BUILDER(
+    Name("FusedConv2DBiasActivation")
+        .Device(DEVICE_GPU)
+        .TypeConstraint("T")
+        .TypeConstraint("Tbias"),
+    FusedConv2DBiasActivationOp);
+
+REGISTER_KERNEL_BUILDER(
+    Name("FusedConv2DBiasActivation")
+        .Device(DEVICE_GPU)
+        .TypeConstraint("T")
+        .TypeConstraint("Tbias"),
+    FusedConv2DBiasActivationOp);
 
 #endif  // GOOGLE_CUDA
 
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.h b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.h
index d71b26cf1d..7534f5797c 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.h
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.h
@@ -24,7 +24,7 @@ limitations under the License.
 
 #if GOOGLE_CUDA
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/kernels/conv_ops_gpu.h"
+#include "tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #endif  // GOOGLE_CUDA
 
@@ -33,27 +33,30 @@ namespace tensorflow {
 // Forward declaration.
 class OpKernelContext;
 
-template 
+template 
 class LaunchFusedConv2DBiasActivationOp {
  public:
   void launch(OpKernelContext* ctx, bool cudnn_use_autotune,
-              const Tensor& input, const Tensor& filter, int row_stride,
-              int col_stride, const Tensor& bias,
-              const ActivationMode& activation_mode,
-              const Eigen::PaddingType& padding, TensorFormat data_format,
-              Tensor* output);
+              const Tensor& conv_input, ScaleType conv_input_scale,
+              const Tensor& filter, int32 row_stride, int32 col_stride,
+              const Eigen::PaddingType& padding, const Tensor& side_input,
+              ScaleType side_input_scale, const Tensor& bias,
+              ActivationMode activation_mode, TensorFormat data_format,
+              FilterTensorFormat filter_format, Tensor* output);
 };
 
 #ifdef GOOGLE_CUDA
-template 
-class LaunchFusedConv2DBiasActivationOp {
+template 
+class LaunchFusedConv2DBiasActivationOp {
  public:
   void launch(OpKernelContext* ctx, bool cudnn_use_autotune,
-              const Tensor& input, const Tensor& filter, int32 row_stride,
-              int32 col_stride, const Tensor& bias,
-              const ActivationMode& activation_mode,
-              const Eigen::PaddingType& padding, TensorFormat data_format,
-              Tensor* output);
+              const Tensor& conv_input, ScaleType conv_input_scale,
+              const Tensor& filter, int32 row_stride, int32 col_stride,
+              const Eigen::PaddingType& padding, const Tensor& side_input,
+              ScaleType side_input_scale, const Tensor& bias,
+              ActivationMode activation_mode, TensorFormat data_format,
+              FilterTensorFormat filter_format, Tensor* output);
 };
 #endif  // GOOGLE_CUDA
 
diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
new file mode 100644
index 0000000000..dc43af1158
--- /dev/null
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv_ops_gpu.h
@@ -0,0 +1,74 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_FUSED_CONV_KERNELS_FUSED_CONV_OPS_GPU_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_FUSED_CONV_KERNELS_FUSED_CONV_OPS_GPU_H_
+
+#if GOOGLE_CUDA
+
+#include "tensorflow/core/kernels/conv_ops_gpu.h"
+#include "tensorflow/core/util/activation_mode.h"
+
+// TODO(pauldonnelly): Merge this file into core/kernels/conv_ops_gpu.h.
+
+namespace tensorflow {
+
+// Add additional parameters specific to fused convolutions.
+class FusedConvParameters : public ConvParameters {
+ public:
+  FusedConvParameters(int64 batch, int64 in_depths, const SpatialArray& in,
+                      int64 out_depths, const SpatialArray& filter,
+                      const SpatialArray& stride, const SpatialArray& padding,
+                      DataType dtype, int device_id, bool has_side_input,
+                      ActivationMode activation_mode)
+      : ConvParameters(batch, in_depths, in, out_depths, filter, stride,
+                       padding, dtype, device_id),
+        activation_mode_(activation_mode),
+        has_side_input_(has_side_input) {
+    hash_code_ = Hash64Combine(hash_code_, has_side_input);
+    hash_code_ = Hash64Combine(hash_code_, activation_mode);
+  }
+
+  bool operator==(const FusedConvParameters& other) const {
+    return this->get_data_as_tuple() == other.get_data_as_tuple();
+  }
+
+  bool operator!=(const FusedConvParameters& other) const {
+    return !(*this == other);
+  }
+
+  string ToString() const {
+    return strings::StrCat(ConvParameters::ToString(), ", ", has_side_input_,
+                           ", ", activation_mode_, ", ");
+  }
+
+ private:
+  using ParameterDataType =
+      std::tuple;
+
+  ParameterDataType get_data_as_tuple() const {
+    return std::make_tuple(ConvParameters::get_data_as_tuple(), has_side_input_,
+                           activation_mode_);
+  }
+
+  ActivationMode activation_mode_;
+  bool has_side_input_;
+};
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_FUSED_CONV_KERNELS_FUSED_CONV_OPS_GPU_H_
diff --git a/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc
index 6134c5c699..48f058b4c5 100644
--- a/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/ops/fused_conv2d_bias_activation_op.cc
@@ -33,40 +33,73 @@ string GetAllActivationModeAttrString() { return "activation_mode: {'Relu'}"; }
 }  // namespace
 
 // --------------------------------------------------------------------------
+
+// TODO(pauldonnelly): Add support for double inputs and scales to this Op,
+// (currently Attr does not support double).
+
 REGISTER_OP("FusedConv2DBiasActivation")
-    .Input("input: T")
+    .Input("conv_input: T")
     .Input("filter: T")
-    .Input("bias: T")
+    .Input("bias: Tbias")
+    .Input("side_input: T")
     .Output("output: T")
-    .Attr("T: {float}")
+    .Attr("T: {float, half, qint8}")
+    .Attr("Tbias: {float, half}")
+    .Attr("conv_input_scale: float = 1.0")
+    .Attr("side_input_scale: float = 0.0")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
-    .Attr(GetConvnetDataFormatAttrString())
-    .Attr(GetAllActivationModeAttrString())
+    .Attr("data_format: {'NHWC', 'NCHW', 'NCHW_VECT_C'} = 'NHWC'")
+    .Attr("filter_format: {'HWIO', 'OIHW', 'OIHW_VECT_I'} = 'HWIO'")
+    .Attr("activation_mode: {'Relu'} = 'Relu'")
     .SetShapeFn(shape_inference::FusedConvBiasActivationShape)
     .Doc(R"doc(
-    Computes a fused 2-D convolution, adds bias, and applies an activation function
-    on the output given 4-D `input`, 4-D `filter`, 1-D `bias` tensors and an activation mode.
+    Computes a fused kernel which implements: 2-D convolution, adds side input,
+    with separate scaling on convolution and side inputs, then adds bias and
+    applies the RELU activation function to the result. Supports both float and
+    qint8 data formats. In the case of qint8, the output is clipped to [0..127].
 
-    input: A 4-D tensor. The dimension order is interpreted according to the value
-        of `data_format`, see below for details.
-    filter: A 4-D tensor of shape
-        `[filter_height, filter_width, in_channels, out_channels]`
-    bias: 1-D with size of the `out_channels` dimension in filter.
-    output: A 4-D tensor. The dimension order is determined by the value of
-        `data_format`, see below for details.
-    T: The data type for the elements of input, filter, bias, and output Tensors.
+    conv_input: A tensor with format as specified by `data_format` (see below).
+    filter: A tensor with format depending on `data_format` as follows:
+        "NHWC", "NCHW":
+             `float [ filter_height, filter_width, in_channels, out_channels ]`
+        "NCHW_VECT_C":
+             `qint8 [ out_channels, in_channels, filter_height, filter_width ]`
+    bias: 1-D float tensor with size matching the `out_channels` dimension of
+        `filter`.
+        Note: this tensor is still float, even if other inputs are qint8.
+    side_input: A tensor with format as specified by `data_format` (see below).
+        This tensor will be ignored and can be [] if side_input_scale == 0.
+        Otherwise, the size of each dimension must match the `output` tensor.
+    output: A tensor with format as specified by `data_format` (see below).
+        The dimension sizes are determined automatically based on other inputs
+        and attributes.
+    T: The element data type of `conv_input`, `side_input` and `output` tensors.
+        Note: must match with the `data_format`.
+    Tbias: The element data type of `bias`.
+    conv_input_scale: scalar float value to be multiplied by `conv_input`.
+        (conceptually.. in reality it is applied after convolution).
+    side_input_scale: scalar float value to be multiplied by `side_input`.
     strides: 1-D tensor of length 4.  The stride of the sliding window for each
         dimension of `input`. The dimension order is determined by the value of
         `data_format`, see below for details.
+        Note: the stride for batch and channel dimensions must be 1.
     padding: The type of padding algorithm to use.
-    data_format: Specify the data format of the input and output data. With the
-        default format "NHWC", the data is stored in the order of:
-        [batch, height, width, channels].
-        Alternatively, the format could be "NCHW", the data storage order of:
-        [batch, channels, height, width].
-    activation_mode: Specify the activation function to apply to the output tensor
-        of bias add. Currently only supports "Relu".
+    data_format: A string specifying the data format of `conv_input`,
+        `side_input` and `output` tensors with the following options:
+        "NHWC": `float [ batch, height, width, channels ]`
+        "NCHW": `float [ batch, channels, height, width ]`
+        "NCHW_VECT_C":
+            `qint8 [ batch, channels / 4, height, width, channels % 4 ]`
+        Note: for "NCHW_VECT_C", `channels` must be a multiple of 4.
+    filter_format: A string specifying the data format of `filter`,
+        "HWIO": `float [ kernel_height, kernel_width, input_channels,
+                         output_channels ]`
+        "OIHW_VECT_I":
+            `qint8 [ output_channels, input_channels / 4,
+                     kernel_height, kernel_width, input_channels % 4 ]`
+    activation_mode: The activation applied to the output.
+        Currently must be "Relu".
 )doc");
 
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py
index 41f986dd07..8f3f31bad0 100644
--- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py
+++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py
@@ -26,62 +26,83 @@ _fused_conv2d_bias_activation_op_so = loader.load_op_library(
     resource_loader.get_path_to_datafile("_fused_conv2d_bias_activation_op.so"))
 
 
-def fused_conv2d_bias_activation(input_tensor,
-                                 filter_tensor,
+# pylint: disable=redefined-builtin
+def fused_conv2d_bias_activation(conv_input,
+                                 filter,
                                  bias,
-                                 strides,
-                                 padding,
-                                 activation_mode,
+                                 strides=None,
+                                 padding=None,
+                                 conv_input_scale=1.0,
+                                 side_input_scale=0.0,
+                                 side_input=None,
+                                 activation_mode="Relu",
                                  data_format=None,
+                                 filter_format=None,
                                  name=None):
-  """Computes a fused 2-D convolution, adds bias, and applies relu.
+  """Fused 2D conv, bias and activation with optional side input.
 
-      input_tensor: A 4-D tensor. The dimension order is interpreted
-      according to the value of `data_format`, see below for details.
-      filter_tensor: A 4-D tensor of shape
-          `[filter_height, filter_width, in_channels, out_channels]`
-      bias: 1-D with size of the `out_channels` dimension in filter.
-      output: A 4-D tensor. The dimension order is determined by the value of
-          `data_format`, see below for details.
-      T: The data type for the elements of input, filter, bias, and output
-      Tensors.
-      strides: 1-D tensor of length 4.  The stride of the sliding window for
-      each
-          dimension of `input`. The dimension order is determined by the value
-          of
-          `data_format`, see below for details.
-      padding: The type of padding algorithm to use.
-      data_format: Specify the data format of the input and output data. With
-      the
-          default format "NHWC", the data is stored in the order of:
-          [batch, height, width, channels].
-          Alternatively, the format could be "NCHW", the data storage order of:
-          [batch, channels, height, width].
-      activation_mode: Specify the activation function to apply to the output
-      tensor
-          of bias add. Currently only supports "Relu".
+  Computes a fused 2-D convolution scaled by conv_input_scale,
+  adds an optional side input scaled by side_input_scale, adds biases,
+  and applies ReLU. As an equation:
+  output = ReLU(conv_input_scale * Conv(conv_input, filter) +
+                side_input_scale * side_input + bias)
+  Note: In int8 mode, The ReLU will clip the output to the range [0..127].
 
   Args:
-    input_tensor: A `Tensor`. Must be one of the following types: `float32`.
-    filter_tensor: A `Tensor`. Must have the same type as `input`.
-    bias: A `Tensor`. Must have the same type as `input`.
-    strides: A list of `ints`.
+    conv_input: A `Tensor` of the format specified by `data_format`.
+    filter: A `Tensor` whose format depends on `data_format`:
+        if `data_format` is "NCHW_VECT_C", filter should be "OIHW_VECT_I"
+        otherwise, it should be "HWIO" format.
+    bias: A 1-D `Tensor` of type `float32`, and dimensions equal to the
+        number of output channels.
+    strides: A list of 4 `ints` specifying convolution strides.
+        if `data_format` is "NCHW" or "NCHW_VECT_C", the order should be NCHW.
+        if `data_format` is "NHWC", the order should be NHWC.
     padding: A `string` from: `"SAME", "VALID"`.
-    activation_mode: A `string` from: `"Sigmoid", "Relu", "Relu6", "ReluX",
-      "Tanh", "BandPass"`.
-    data_format: An optional `string` from: `"NHWC", "NCHW"`. Defaults to
-      `"NHWC"`.
+    conv_input_scale: A scalar `float32` that will be multiplied by conv_input.
+        This is optional and defaults to 1. However it should be set to
+        specify the quantization scale when `data_format` is "NCHW_VECT_C".
+    side_input_scale: A scalar `float32` that will be multiplied by side_input.
+        This is optional and defaults to 0.
+    side_input: A `Tensor` of the format specified by `data_format`.
+        This is useful for imlementing ResNet blocks.
+    activation_mode: (optional) currently must be the default "Relu".
+        Note that in qint8 mode, it also clips to 127, so acts like ReluX.
+    data_format: Specifies the data format.
+        Possible values are:
+        "NHWC" float [batch, height, width, channels]
+        "NCHW" float [batch, channels, height, width]
+        "NCHW_VECT_C" qint8 [batch, channels / 4, height, width, channels % 4]
+        Defaults to `"NHWC"`.
+        Performance is worst for `"NHWC"` and best for `"NCHW_VECT_C"`.
+    filter_format: Specifies the filter format.
+        Possible values are:
+        "HWIO" float [kernel_height, kernel_width, input_channels,
+                      output_channels ]
+        "OIHW" float [output_channels, input_channels, kernel_height,
+                      kernel_width ]
+        "OIHW_VECT_I" qint8 [ output_channels, input_channels / 4,
+                              kernel_height, kernel_width, input_channels % 4 ]
+        Defaults to `"HWIO"`.
     name: A name for the operation (optional).
 
   Returns:
-    A `Tensor`. Has the same type as `input`.
+    A `Tensor` of the format specified by `data_format`.
   """
+  if strides is None:
+    strides = [1, 1, 1, 1]
+  if side_input is None:
+    side_input = []
   return gen_fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
-      input=input_tensor,
-      filter=filter_tensor,
-      bias=bias,
-      strides=strides,
+      conv_input,
+      filter,
+      bias,
       padding=padding,
+      strides=strides,
+      conv_input_scale=conv_input_scale,
+      side_input_scale=side_input_scale,
+      side_input=side_input,
       activation_mode=activation_mode,
       data_format=data_format,
+      filter_format=filter_format,
       name=name)
diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
index 5d6a2fa3b8..3b8f7d6ed7 100644
--- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
+++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py
@@ -19,13 +19,16 @@ from __future__ import division
 from __future__ import print_function
 
 import numpy as np
+
 from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging
 
@@ -484,7 +487,8 @@ class FusedConv2DBiasActivationTest(test.TestCase):
     with self.test_session() as sess:
       # Illegal strides.
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
-                                   "strides in the batch and depth"):
+                                   "Convolutional strides are not supported in "
+                                   "the batch or depth dimensions."):
         sess.run(
             fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
                 array_ops.placeholder(dtypes.float32),
@@ -494,7 +498,8 @@ class FusedConv2DBiasActivationTest(test.TestCase):
                 padding="SAME",
                 activation_mode="Relu"))
       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
-                                   "strides in the batch and depth"):
+                                   "Convolutional strides are not supported in "
+                                   "the batch or depth dimensions."):
         sess.run(
             fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
                 array_ops.placeholder(dtypes.float32),
@@ -552,6 +557,286 @@ def GetInceptionFwdTest(input_size, filter_size, stride, padding,
   return Test
 
 
+def CalculateCovolvedOutputDim(input_dim, filter_dim, stride, padding_type):
+  """Calculates the size of an output dimension of a strided convolution.
+
+  Given the sizes of the corresponding dimension of the input and filter shapes,
+  and the stride and padding_types, calculates the size of the output dimension.
+  This function can be called separately for each input dimension.
+
+  Args:
+    input_dim: An `int` specifying the size of the input dimension.
+    filter_dim: An `int` specifying the size of the filter dimension.
+    stride: An `int` specifying the step size of the convolution along the
+      input dimension.
+    padding_type: either 'VALID' or 'SAME'.
+
+  Returns:
+    The size of the output dimension.
+  """
+  if padding_type == "VALID":
+    return (input_dim - filter_dim + stride) // stride
+  else:  # padding_type == 'SAME'
+    return (input_dim + stride - 1) // stride
+
+
+def NchwVectCToNchw(in_tensor):
+  # [N, C / 4, H, W, 4] => [N, C / 4, 4, H, W] == [N, C, H, W]
+  t = array_ops.transpose(in_tensor, [0, 1, 4, 2, 3])
+  n = in_tensor.shape.dims[0].value
+  c = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value
+  h = in_tensor.shape.dims[2].value
+  w = in_tensor.shape.dims[3].value
+  return array_ops.reshape(t, [n, c, h, w])
+
+
+def OihwVectIToHwio(in_tensor):
+  # [O, I / 4, H, W, 4] => [O, I / 4, 4, H, W] == [O, I, H, W]
+  t = array_ops.transpose(in_tensor, [2, 3, 1, 4, 0])
+  o = in_tensor.shape.dims[0].value
+  i = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value
+  h = in_tensor.shape.dims[2].value
+  w = in_tensor.shape.dims[3].value
+  return array_ops.reshape(t, [h, w, i, o])
+
+
+def NchwToNchwVectC(in_tensor):
+  n, c, h, w = in_tensor.shape.as_list()
+  assert c % 4 == 0
+  t = array_ops.reshape(in_tensor, [n, c // 4, 4, h, w])
+  return array_ops.transpose(t, [0, 1, 3, 4, 2])
+
+
+def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel,
+                                          padding, strides, side_input_scale,
+                                          side_input, biases):
+  """Simulates the int8 fused 2-D convolution op using separate float ops.
+
+    The arguments and return values have the same format, meanings and
+    restrictions as the actual op.
+  Args:
+    conv_input_scale: A scalar 'float'.
+    conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
+    kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout.
+    padding: A `string` from: `"SAME", "VALID"`.
+    strides: A list of `ints`.
+    side_input_scale: A scalar 'float'.
+    side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
+    biases: A `Tensor` of type `float32` in NCHW layout.
+  Returns:
+    A `Tensor` of type `qint8` in NCHW_VECT_C layout.
+  """
+  conv_result = nn_ops.conv2d(
+      NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)),
+      OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)),
+      strides=strides,
+      padding=padding,
+      data_format="NCHW") * conv_input_scale
+
+  conv_and_side_inputs = conv_result + side_input_scale * NchwVectCToNchw(
+      gen_array_ops.dequantize(side_input, -128, 127))
+
+  logit = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW")
+
+  result, _, _ = gen_array_ops.quantize_v2(
+      NchwToNchwVectC(nn_ops.relu(logit)), -128, 127, dtypes.qint8)
+  return result
+
+
+class FusedConvInt8Tests(test.TestCase):
+  _test_params = [
+      {
+          "batch_size": 2,
+          "input_channels": 8,
+          "output_channels": 16,
+          "input_height": 8,
+          "input_width": 8,
+          "filter_height": 3,
+          "filter_width": 3,
+          "vertical_stride": 2,
+          "horizontal_stride": 2,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.0,
+          "bias_scale": 1,
+          "padding_type": "VALID"
+      },
+      {
+          "batch_size": 2,
+          "input_channels": 8,
+          "output_channels": 16,
+          "input_height": 8,
+          "input_width": 8,
+          "filter_height": 3,
+          "filter_width": 3,
+          "vertical_stride": 2,
+          "horizontal_stride": 2,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.0,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+      {
+          "batch_size": 2,
+          "input_channels": 8,
+          "output_channels": 16,
+          "input_height": 8,
+          "input_width": 8,
+          "filter_height": 3,
+          "filter_width": 3,
+          "vertical_stride": 2,
+          "horizontal_stride": 2,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.5,
+          "bias_scale": 1,
+          "padding_type": "VALID"
+      },
+      {
+          "batch_size": 2,
+          "input_channels": 16,
+          "output_channels": 16,
+          "input_height": 9,
+          "input_width": 9,
+          "filter_height": 3,
+          "filter_width": 3,
+          "vertical_stride": 1,
+          "horizontal_stride": 1,
+          "conv_input_scale": 0.001,
+          "side_input_scale": 0.5,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+      {
+          "batch_size": 3,
+          "input_channels": 8,
+          "output_channels": 8,
+          "input_height": 9,
+          "input_width": 9,
+          "filter_height": 5,
+          "filter_width": 5,
+          "vertical_stride": 1,
+          "horizontal_stride": 1,
+          "conv_input_scale": 0.001,
+          "side_input_scale": 0.5,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+      {
+          "batch_size": 3,
+          "input_channels": 8,
+          "output_channels": 8,
+          "input_height": 9,
+          "input_width": 9,
+          "filter_height": 7,
+          "filter_width": 1,
+          "vertical_stride": 2,
+          "horizontal_stride": 1,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.5,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+      {
+          "batch_size": 3,
+          "input_channels": 8,
+          "output_channels": 8,
+          "input_height": 9,
+          "input_width": 9,
+          "filter_height": 1,
+          "filter_width": 7,
+          "vertical_stride": 1,
+          "horizontal_stride": 1,
+          "conv_input_scale": 0.002,
+          "side_input_scale": 0.5,
+          "bias_scale": 1,
+          "padding_type": "SAME"
+      },
+  ]
+
+  def runTest(self, test_param):
+    batch_size = test_param["batch_size"]
+    input_channels = test_param["input_channels"]
+    output_channels = test_param["output_channels"]
+    input_height = test_param["input_height"]
+    input_width = test_param["input_width"]
+    filter_height = test_param["filter_height"]
+    filter_width = test_param["filter_width"]
+    vertical_stride = test_param["vertical_stride"]
+    horizontal_stride = test_param["horizontal_stride"]
+    conv_input_scale = test_param["conv_input_scale"]
+    side_input_scale = test_param["side_input_scale"]
+    bias_scale = test_param["bias_scale"]
+    padding_type = test_param["padding_type"]
+
+    conv_input, _, _ = gen_array_ops.quantize_v2(
+        random_ops.random_uniform(
+            [batch_size, input_channels // 4, input_height, input_width, 4],
+            minval=-0.0,
+            maxval=1.0,
+            dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8)
+
+    kernel, _, _ = gen_array_ops.quantize_v2(
+        random_ops.random_uniform(
+            [
+                output_channels, input_channels // 4, filter_height,
+                filter_width, 4
+            ],
+            minval=-1.0,
+            maxval=1.0,
+            dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8)
+
+    output_height = CalculateCovolvedOutputDim(input_height, filter_height,
+                                               vertical_stride, padding_type)
+    output_width = CalculateCovolvedOutputDim(input_width, filter_width,
+                                              horizontal_stride, padding_type)
+    print("output_height=", output_height, ", output_width=", output_width)
+
+    side_input, _, _ = gen_array_ops.quantize_v2(
+        random_ops.random_uniform(
+            [batch_size, output_channels // 4, output_height, output_width, 4],
+            minval=0.0,
+            maxval=1.0,
+            dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8)
+
+    biases = random_ops.random_uniform(
+        [output_channels],
+        minval=-10 * bias_scale,
+        maxval=20 * bias_scale,
+        dtype=dtypes.float32)
+
+    strides = [1, 1, vertical_stride, horizontal_stride]
+
+    actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
+        conv_input,
+        kernel,
+        biases,
+        strides=strides,
+        padding=padding_type,
+        conv_input_scale=conv_input_scale,
+        side_input_scale=side_input_scale,
+        side_input=side_input,
+        data_format="NCHW_VECT_C",
+        filter_format="OIHW_VECT_I")
+
+    expected = SimulateFusedConv2dBiasActivationInt8(
+        conv_input_scale, conv_input, kernel, padding_type, strides,
+        side_input_scale, side_input, biases)
+
+    with self.test_session(use_gpu=True) as sess:
+      actual_y, expected_y = sess.run([actual, expected])
+      print("actual_y = ", actual_y)
+      print("expected_y = ", expected_y)
+      self.assertTrue(np.array_equal(actual_y, expected_y))
+
+  def testFusedConvInt8(self):
+    if not test.is_gpu_available(
+        cuda_only=True, min_cuda_compute_capability=(6, 1)):
+      tf_logging.info("int8 test skipped because not run with --config=cuda or "
+                      "no GPUs with compute capability >= 6.1 are available.")
+      return
+    for test_param in self._test_params:
+      self.runTest(test_param)
+
+
 if __name__ == "__main__":
   for index, (input_size_, filter_size_, output_size_, stride_,
               padding_) in enumerate(GetShrunkInceptionShapes()):
diff --git a/tensorflow/contrib/keras/BUILD b/tensorflow/contrib/keras/BUILD
index 26f0e41518..7e0019ce4a 100644
--- a/tensorflow/contrib/keras/BUILD
+++ b/tensorflow/contrib/keras/BUILD
@@ -1,5 +1,6 @@
 # Description:
 #   Contains the Keras API (internal TensorFlow version).
+#   Note that tf.contrib.keras has been deprecated in favor of tf.keras.
 
 licenses(["notice"])  # Apache 2.0
 
@@ -7,9 +8,6 @@ exports_files(["LICENSE"])
 
 package(default_visibility = ["//tensorflow:__subpackages__"])
 
-load("//tensorflow:tensorflow.bzl", "cuda_py_test")
-load("//tensorflow:tensorflow.bzl", "py_test")
-
 py_library(
     name = "keras",
     srcs = [
@@ -48,641 +46,10 @@ py_library(
         "api/keras/utils/__init__.py",
         "api/keras/wrappers/__init__.py",
         "api/keras/wrappers/scikit_learn/__init__.py",
-        "python/keras/__init__.py",
-        "python/keras/activations.py",
-        "python/keras/applications/__init__.py",
-        "python/keras/applications/imagenet_utils.py",
-        "python/keras/applications/inception_v3.py",
-        "python/keras/applications/mobilenet.py",
-        "python/keras/applications/resnet50.py",
-        "python/keras/applications/vgg16.py",
-        "python/keras/applications/vgg19.py",
-        "python/keras/applications/xception.py",
-        "python/keras/backend.py",
-        "python/keras/callbacks.py",
-        "python/keras/constraints.py",
-        "python/keras/datasets/__init__.py",
-        "python/keras/datasets/boston_housing.py",
-        "python/keras/datasets/cifar.py",
-        "python/keras/datasets/cifar10.py",
-        "python/keras/datasets/cifar100.py",
-        "python/keras/datasets/imdb.py",
-        "python/keras/datasets/mnist.py",
-        "python/keras/datasets/reuters.py",
-        "python/keras/engine/__init__.py",
-        "python/keras/engine/topology.py",
-        "python/keras/engine/training.py",
-        "python/keras/initializers.py",
-        "python/keras/layers/__init__.py",
-        "python/keras/layers/advanced_activations.py",
-        "python/keras/layers/convolutional.py",
-        "python/keras/layers/convolutional_recurrent.py",
-        "python/keras/layers/core.py",
-        "python/keras/layers/embeddings.py",
-        "python/keras/layers/local.py",
-        "python/keras/layers/merge.py",
-        "python/keras/layers/noise.py",
-        "python/keras/layers/normalization.py",
-        "python/keras/layers/pooling.py",
-        "python/keras/layers/recurrent.py",
-        "python/keras/layers/serialization.py",
-        "python/keras/layers/wrappers.py",
-        "python/keras/losses.py",
-        "python/keras/metrics.py",
-        "python/keras/models.py",
-        "python/keras/optimizers.py",
-        "python/keras/preprocessing/__init__.py",
-        "python/keras/preprocessing/image.py",
-        "python/keras/preprocessing/sequence.py",
-        "python/keras/preprocessing/text.py",
-        "python/keras/regularizers.py",
-        "python/keras/testing_utils.py",
-        "python/keras/utils/__init__.py",
-        "python/keras/utils/conv_utils.py",
-        "python/keras/utils/data_utils.py",
-        "python/keras/utils/generic_utils.py",
-        "python/keras/utils/io_utils.py",
-        "python/keras/utils/layer_utils.py",
-        "python/keras/utils/np_utils.py",
-        "python/keras/utils/vis_utils.py",
-        "python/keras/wrappers/__init__.py",
-        "python/keras/wrappers/scikit_learn.py",
-    ],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/contrib/tensorboard:projector",
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:check_ops",
-        "//tensorflow/python:client",
-        "//tensorflow/python:clip_ops",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:ctc_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:functional_ops",
-        "//tensorflow/python:gradients",
-        "//tensorflow/python:image_ops",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:layers",
-        "//tensorflow/python:layers_base",
-        "//tensorflow/python:logging_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:nn",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:state_ops",
-        "//tensorflow/python:summary",
-        "//tensorflow/python:tensor_array_grad",
-        "//tensorflow/python:tensor_array_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:training",
-        "//tensorflow/python:util",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
-        "@six_archive//:six",
-    ],
-)
-
-py_test(
-    name = "integration_test",
-    size = "medium",
-    srcs = ["python/keras/integration_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:layers",
-        "//tensorflow/python:nn",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "activations_test",
-    size = "small",
-    srcs = ["python/keras/activations_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "constraints_test",
-    size = "small",
-    srcs = ["python/keras/constraints_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "initializers_test",
-    size = "small",
-    srcs = ["python/keras/initializers_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:init_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "regularizers_test",
-    size = "small",
-    srcs = ["python/keras/regularizers_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_test(
-    name = "optimizers_test",
-    size = "medium",
-    srcs = ["python/keras/optimizers_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:training",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "losses_test",
-    size = "small",
-    srcs = ["python/keras/losses_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "metrics_test",
-    size = "small",
-    srcs = ["python/keras/metrics_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "inception_v3_test",
-    size = "medium",
-    srcs = ["python/keras/applications/inception_v3_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "mobilenet_test",
-    size = "medium",
-    srcs = ["python/keras/applications/mobilenet_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "resnet50_test",
-    size = "small",
-    srcs = ["python/keras/applications/resnet50_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_test(
-    name = "vgg16_test",
-    size = "small",
-    srcs = ["python/keras/applications/vgg16_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_test(
-    name = "vgg19_test",
-    size = "small",
-    srcs = ["python/keras/applications/vgg19_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_test(
-    name = "xception_test",
-    size = "medium",
-    srcs = ["python/keras/applications/xception_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "advanced_activations_test",
-    size = "small",
-    srcs = ["python/keras/layers/advanced_activations_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_test(
-    name = "convolutional_recurrent_test",
-    size = "medium",
-    srcs = ["python/keras/layers/convolutional_recurrent_test.py"],
-    shard_count = 2,
-    srcs_version = "PY2AND3",
-    tags = ["noasan"],  # times out b/63678675
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "convolutional_test",
-    size = "medium",
-    srcs = ["python/keras/layers/convolutional_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "manual",
-        "noasan",  # times out b/63678675
-        "notsan",
-    ],
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "pooling_test",
-    size = "small",
-    srcs = ["python/keras/layers/pooling_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_test(
-    name = "core_test",
-    size = "small",
-    srcs = ["python/keras/layers/core_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "embeddings_test",
-    size = "small",
-    srcs = ["python/keras/layers/embeddings_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_test(
-    name = "local_test",
-    size = "medium",
-    srcs = ["python/keras/layers/local_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "merge_test",
-    size = "small",
-    srcs = ["python/keras/layers/merge_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "noise_test",
-    size = "small",
-    srcs = ["python/keras/layers/noise_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_test(
-    name = "normalization_test",
-    size = "small",
-    srcs = ["python/keras/layers/normalization_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "simplernn_test",
-    size = "medium",
-    srcs = ["python/keras/layers/simplernn_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "gru_test",
-    size = "medium",
-    srcs = ["python/keras/layers/gru_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],  # http://b/62136390
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "lstm_test",
-    size = "medium",
-    srcs = ["python/keras/layers/lstm_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "noasan",  # times out b/63678675
-        "notsan",  # http://b/62189182
-    ],
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "serialization_test",
-    size = "small",
-    srcs = ["python/keras/layers/serialization_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_test(
-    name = "wrappers_test",
-    size = "small",
-    srcs = ["python/keras/layers/wrappers_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "scikit_learn_test",
-    size = "small",
-    srcs = ["python/keras/wrappers/scikit_learn_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "data_utils_test",
-    size = "small",
-    srcs = ["python/keras/utils/data_utils_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "noasan",  # times out
-        "notsan",
-    ],
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "generic_utils_test",
-    size = "small",
-    srcs = ["python/keras/utils/generic_utils_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-    ],
-)
-
-py_test(
-    name = "io_utils_test",
-    size = "small",
-    srcs = ["python/keras/utils/io_utils_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "imagenet_utils_test",
-    size = "small",
-    srcs = ["python/keras/applications/imagenet_utils_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "image_test",
-    size = "medium",
-    srcs = ["python/keras/preprocessing/image_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "sequence_test",
-    size = "small",
-    srcs = ["python/keras/preprocessing/sequence_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "text_test",
-    size = "small",
-    srcs = ["python/keras/preprocessing/text_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "callbacks_test",
-    size = "medium",
-    srcs = ["python/keras/callbacks_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "training_test",
-    size = "medium",
-    srcs = ["python/keras/engine/training_test.py"],
-    srcs_version = "PY2AND3",
-    tags = ["notsan"],
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "topology_test",
-    size = "small",
-    srcs = ["python/keras/engine/topology_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "models_test",
-    size = "small",
-    srcs = ["python/keras/models_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:training",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_test(
-    name = "backend_test",
-    size = "small",
-    srcs = ["python/keras/backend_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":keras",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:util",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "testing_utils",
-    srcs = [
-        "python/keras/testing_utils.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
-        ":keras",
-        "//tensorflow/python:util",
-        "//third_party/py/numpy",
+        "//tensorflow/python/keras",
     ],
 )
 
diff --git a/tensorflow/contrib/keras/README.md b/tensorflow/contrib/keras/README.md
index db2556fe42..de4c81268d 100644
--- a/tensorflow/contrib/keras/README.md
+++ b/tensorflow/contrib/keras/README.md
@@ -1,3 +1,6 @@
+NOTE: THE `tensorflow.contrib.keras` MODULE HAS BEEN DEPRECATED.
+USE INSTEAD `tensorflow.keras`, PART OF CORE TENSORFLOW.
+
 Keras is an object-oriented API for defining and training neural networks.
 
 This module contains a pure-TensorFlow implementation of the Keras API,
diff --git a/tensorflow/contrib/keras/api/keras/activations/__init__.py b/tensorflow/contrib/keras/api/keras/activations/__init__.py
index af6f249e71..d04838c218 100644
--- a/tensorflow/contrib/keras/api/keras/activations/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/activations/__init__.py
@@ -19,22 +19,22 @@ from __future__ import division
 from __future__ import print_function
 
 # Activation functions.
-from tensorflow.contrib.keras.python.keras.activations import elu
-from tensorflow.contrib.keras.python.keras.activations import hard_sigmoid
-from tensorflow.contrib.keras.python.keras.activations import linear
-from tensorflow.contrib.keras.python.keras.activations import relu
-from tensorflow.contrib.keras.python.keras.activations import selu
-from tensorflow.contrib.keras.python.keras.activations import sigmoid
-from tensorflow.contrib.keras.python.keras.activations import softmax
-from tensorflow.contrib.keras.python.keras.activations import softplus
-from tensorflow.contrib.keras.python.keras.activations import softsign
-from tensorflow.contrib.keras.python.keras.activations import tanh
+from tensorflow.python.keras._impl.keras.activations import elu
+from tensorflow.python.keras._impl.keras.activations import hard_sigmoid
+from tensorflow.python.keras._impl.keras.activations import linear
+from tensorflow.python.keras._impl.keras.activations import relu
+from tensorflow.python.keras._impl.keras.activations import selu
+from tensorflow.python.keras._impl.keras.activations import sigmoid
+from tensorflow.python.keras._impl.keras.activations import softmax
+from tensorflow.python.keras._impl.keras.activations import softplus
+from tensorflow.python.keras._impl.keras.activations import softsign
+from tensorflow.python.keras._impl.keras.activations import tanh
 
 # Auxiliary utils.
 # pylint: disable=g-bad-import-order
-from tensorflow.contrib.keras.python.keras.activations import deserialize
-from tensorflow.contrib.keras.python.keras.activations import serialize
-from tensorflow.contrib.keras.python.keras.activations import get
+from tensorflow.python.keras._impl.keras.activations import deserialize
+from tensorflow.python.keras._impl.keras.activations import serialize
+from tensorflow.python.keras._impl.keras.activations import get
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/applications/inception_v3/__init__.py b/tensorflow/contrib/keras/api/keras/applications/inception_v3/__init__.py
index d8ca73fb97..abf8393ae4 100644
--- a/tensorflow/contrib/keras/api/keras/applications/inception_v3/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/applications/inception_v3/__init__.py
@@ -18,9 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.applications.inception_v3 import decode_predictions
-from tensorflow.contrib.keras.python.keras.applications.inception_v3 import InceptionV3
-from tensorflow.contrib.keras.python.keras.applications.inception_v3 import preprocess_input
+from tensorflow.python.keras._impl.keras.applications.inception_v3 import decode_predictions
+from tensorflow.python.keras._impl.keras.applications.inception_v3 import InceptionV3
+from tensorflow.python.keras._impl.keras.applications.inception_v3 import preprocess_input
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/applications/mobilenet/__init__.py b/tensorflow/contrib/keras/api/keras/applications/mobilenet/__init__.py
index 594861fb51..b809e91193 100644
--- a/tensorflow/contrib/keras/api/keras/applications/mobilenet/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/applications/mobilenet/__init__.py
@@ -18,9 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.applications.mobilenet import decode_predictions
-from tensorflow.contrib.keras.python.keras.applications.mobilenet import MobileNet
-from tensorflow.contrib.keras.python.keras.applications.mobilenet import preprocess_input
+from tensorflow.python.keras._impl.keras.applications.mobilenet import decode_predictions
+from tensorflow.python.keras._impl.keras.applications.mobilenet import MobileNet
+from tensorflow.python.keras._impl.keras.applications.mobilenet import preprocess_input
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/applications/resnet50/__init__.py b/tensorflow/contrib/keras/api/keras/applications/resnet50/__init__.py
index e9b25b66d5..530805d150 100644
--- a/tensorflow/contrib/keras/api/keras/applications/resnet50/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/applications/resnet50/__init__.py
@@ -18,9 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.applications.resnet50 import decode_predictions
-from tensorflow.contrib.keras.python.keras.applications.resnet50 import preprocess_input
-from tensorflow.contrib.keras.python.keras.applications.resnet50 import ResNet50
+from tensorflow.python.keras._impl.keras.applications.resnet50 import decode_predictions
+from tensorflow.python.keras._impl.keras.applications.resnet50 import preprocess_input
+from tensorflow.python.keras._impl.keras.applications.resnet50 import ResNet50
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/applications/vgg16/__init__.py b/tensorflow/contrib/keras/api/keras/applications/vgg16/__init__.py
index 2a1f789cc5..118361604b 100644
--- a/tensorflow/contrib/keras/api/keras/applications/vgg16/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/applications/vgg16/__init__.py
@@ -18,9 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.applications.vgg16 import decode_predictions
-from tensorflow.contrib.keras.python.keras.applications.vgg16 import preprocess_input
-from tensorflow.contrib.keras.python.keras.applications.vgg16 import VGG16
+from tensorflow.python.keras._impl.keras.applications.vgg16 import decode_predictions
+from tensorflow.python.keras._impl.keras.applications.vgg16 import preprocess_input
+from tensorflow.python.keras._impl.keras.applications.vgg16 import VGG16
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/applications/vgg19/__init__.py b/tensorflow/contrib/keras/api/keras/applications/vgg19/__init__.py
index 22b5e7c8e4..cda52628f3 100644
--- a/tensorflow/contrib/keras/api/keras/applications/vgg19/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/applications/vgg19/__init__.py
@@ -18,9 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.applications.vgg19 import decode_predictions
-from tensorflow.contrib.keras.python.keras.applications.vgg19 import preprocess_input
-from tensorflow.contrib.keras.python.keras.applications.vgg19 import VGG19
+from tensorflow.python.keras._impl.keras.applications.vgg19 import decode_predictions
+from tensorflow.python.keras._impl.keras.applications.vgg19 import preprocess_input
+from tensorflow.python.keras._impl.keras.applications.vgg19 import VGG19
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/applications/xception/__init__.py b/tensorflow/contrib/keras/api/keras/applications/xception/__init__.py
index 23d1b6a0b3..ae9cd9cd18 100644
--- a/tensorflow/contrib/keras/api/keras/applications/xception/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/applications/xception/__init__.py
@@ -18,9 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.applications.xception import decode_predictions
-from tensorflow.contrib.keras.python.keras.applications.xception import preprocess_input
-from tensorflow.contrib.keras.python.keras.applications.xception import Xception
+from tensorflow.python.keras._impl.keras.applications.xception import decode_predictions
+from tensorflow.python.keras._impl.keras.applications.xception import preprocess_input
+from tensorflow.python.keras._impl.keras.applications.xception import Xception
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/backend/__init__.py b/tensorflow/contrib/keras/api/keras/backend/__init__.py
index f3721a8dcb..10ef5a7585 100644
--- a/tensorflow/contrib/keras/api/keras/backend/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/backend/__init__.py
@@ -19,144 +19,144 @@ from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=redefined-builtin
-from tensorflow.contrib.keras.python.keras.backend import abs
-from tensorflow.contrib.keras.python.keras.backend import all
-from tensorflow.contrib.keras.python.keras.backend import any
-from tensorflow.contrib.keras.python.keras.backend import arange
-from tensorflow.contrib.keras.python.keras.backend import argmax
-from tensorflow.contrib.keras.python.keras.backend import argmin
-from tensorflow.contrib.keras.python.keras.backend import backend
-from tensorflow.contrib.keras.python.keras.backend import batch_dot
-from tensorflow.contrib.keras.python.keras.backend import batch_flatten
-from tensorflow.contrib.keras.python.keras.backend import batch_get_value
-from tensorflow.contrib.keras.python.keras.backend import batch_normalization
-from tensorflow.contrib.keras.python.keras.backend import batch_set_value
-from tensorflow.contrib.keras.python.keras.backend import bias_add
-from tensorflow.contrib.keras.python.keras.backend import binary_crossentropy
-from tensorflow.contrib.keras.python.keras.backend import cast
-from tensorflow.contrib.keras.python.keras.backend import cast_to_floatx
-from tensorflow.contrib.keras.python.keras.backend import categorical_crossentropy
-from tensorflow.contrib.keras.python.keras.backend import clear_session
-from tensorflow.contrib.keras.python.keras.backend import clip
-from tensorflow.contrib.keras.python.keras.backend import concatenate
-from tensorflow.contrib.keras.python.keras.backend import constant
-from tensorflow.contrib.keras.python.keras.backend import conv1d
-from tensorflow.contrib.keras.python.keras.backend import conv2d
-from tensorflow.contrib.keras.python.keras.backend import conv2d_transpose
-from tensorflow.contrib.keras.python.keras.backend import conv3d
-from tensorflow.contrib.keras.python.keras.backend import cos
-from tensorflow.contrib.keras.python.keras.backend import count_params
-from tensorflow.contrib.keras.python.keras.backend import ctc_batch_cost
-from tensorflow.contrib.keras.python.keras.backend import ctc_decode
-from tensorflow.contrib.keras.python.keras.backend import ctc_label_dense_to_sparse
-from tensorflow.contrib.keras.python.keras.backend import dot
-from tensorflow.contrib.keras.python.keras.backend import dropout
-from tensorflow.contrib.keras.python.keras.backend import dtype
-from tensorflow.contrib.keras.python.keras.backend import elu
-from tensorflow.contrib.keras.python.keras.backend import epsilon
-from tensorflow.contrib.keras.python.keras.backend import equal
-from tensorflow.contrib.keras.python.keras.backend import eval
-from tensorflow.contrib.keras.python.keras.backend import exp
-from tensorflow.contrib.keras.python.keras.backend import expand_dims
-from tensorflow.contrib.keras.python.keras.backend import eye
-from tensorflow.contrib.keras.python.keras.backend import flatten
-from tensorflow.contrib.keras.python.keras.backend import floatx
-from tensorflow.contrib.keras.python.keras.backend import foldl
-from tensorflow.contrib.keras.python.keras.backend import foldr
-from tensorflow.contrib.keras.python.keras.backend import function
-from tensorflow.contrib.keras.python.keras.backend import gather
-from tensorflow.contrib.keras.python.keras.backend import get_session
-from tensorflow.contrib.keras.python.keras.backend import get_uid
-from tensorflow.contrib.keras.python.keras.backend import get_value
-from tensorflow.contrib.keras.python.keras.backend import gradients
-from tensorflow.contrib.keras.python.keras.backend import greater
-from tensorflow.contrib.keras.python.keras.backend import greater_equal
-from tensorflow.contrib.keras.python.keras.backend import hard_sigmoid
-from tensorflow.contrib.keras.python.keras.backend import image_data_format
-from tensorflow.contrib.keras.python.keras.backend import in_test_phase
-from tensorflow.contrib.keras.python.keras.backend import in_top_k
-from tensorflow.contrib.keras.python.keras.backend import in_train_phase
-from tensorflow.contrib.keras.python.keras.backend import int_shape
-from tensorflow.contrib.keras.python.keras.backend import is_sparse
-from tensorflow.contrib.keras.python.keras.backend import l2_normalize
-from tensorflow.contrib.keras.python.keras.backend import learning_phase
-from tensorflow.contrib.keras.python.keras.backend import less
-from tensorflow.contrib.keras.python.keras.backend import less_equal
-from tensorflow.contrib.keras.python.keras.backend import log
-from tensorflow.contrib.keras.python.keras.backend import manual_variable_initialization
-from tensorflow.contrib.keras.python.keras.backend import map_fn
-from tensorflow.contrib.keras.python.keras.backend import max
-from tensorflow.contrib.keras.python.keras.backend import maximum
-from tensorflow.contrib.keras.python.keras.backend import mean
-from tensorflow.contrib.keras.python.keras.backend import min
-from tensorflow.contrib.keras.python.keras.backend import minimum
-from tensorflow.contrib.keras.python.keras.backend import moving_average_update
-from tensorflow.contrib.keras.python.keras.backend import name_scope
-from tensorflow.contrib.keras.python.keras.backend import ndim
-from tensorflow.contrib.keras.python.keras.backend import normalize_batch_in_training
-from tensorflow.contrib.keras.python.keras.backend import not_equal
-from tensorflow.contrib.keras.python.keras.backend import one_hot
-from tensorflow.contrib.keras.python.keras.backend import ones
-from tensorflow.contrib.keras.python.keras.backend import ones_like
-from tensorflow.contrib.keras.python.keras.backend import permute_dimensions
-from tensorflow.contrib.keras.python.keras.backend import placeholder
-from tensorflow.contrib.keras.python.keras.backend import pool2d
-from tensorflow.contrib.keras.python.keras.backend import pool3d
-from tensorflow.contrib.keras.python.keras.backend import pow
-from tensorflow.contrib.keras.python.keras.backend import print_tensor
-from tensorflow.contrib.keras.python.keras.backend import prod
-from tensorflow.contrib.keras.python.keras.backend import random_binomial
-from tensorflow.contrib.keras.python.keras.backend import random_normal
-from tensorflow.contrib.keras.python.keras.backend import random_normal_variable
-from tensorflow.contrib.keras.python.keras.backend import random_uniform
-from tensorflow.contrib.keras.python.keras.backend import random_uniform_variable
-from tensorflow.contrib.keras.python.keras.backend import relu
-from tensorflow.contrib.keras.python.keras.backend import repeat
-from tensorflow.contrib.keras.python.keras.backend import repeat_elements
-from tensorflow.contrib.keras.python.keras.backend import reset_uids
-from tensorflow.contrib.keras.python.keras.backend import reshape
-from tensorflow.contrib.keras.python.keras.backend import resize_images
-from tensorflow.contrib.keras.python.keras.backend import resize_volumes
-from tensorflow.contrib.keras.python.keras.backend import reverse
-from tensorflow.contrib.keras.python.keras.backend import rnn
-from tensorflow.contrib.keras.python.keras.backend import round
-from tensorflow.contrib.keras.python.keras.backend import separable_conv2d
-from tensorflow.contrib.keras.python.keras.backend import set_epsilon
-from tensorflow.contrib.keras.python.keras.backend import set_floatx
-from tensorflow.contrib.keras.python.keras.backend import set_image_data_format
-from tensorflow.contrib.keras.python.keras.backend import set_learning_phase
-from tensorflow.contrib.keras.python.keras.backend import set_session
-from tensorflow.contrib.keras.python.keras.backend import set_value
-from tensorflow.contrib.keras.python.keras.backend import shape
-from tensorflow.contrib.keras.python.keras.backend import sigmoid
-from tensorflow.contrib.keras.python.keras.backend import sign
-from tensorflow.contrib.keras.python.keras.backend import sin
-from tensorflow.contrib.keras.python.keras.backend import softmax
-from tensorflow.contrib.keras.python.keras.backend import softplus
-from tensorflow.contrib.keras.python.keras.backend import softsign
-from tensorflow.contrib.keras.python.keras.backend import sparse_categorical_crossentropy
-from tensorflow.contrib.keras.python.keras.backend import spatial_2d_padding
-from tensorflow.contrib.keras.python.keras.backend import spatial_3d_padding
-from tensorflow.contrib.keras.python.keras.backend import sqrt
-from tensorflow.contrib.keras.python.keras.backend import square
-from tensorflow.contrib.keras.python.keras.backend import squeeze
-from tensorflow.contrib.keras.python.keras.backend import stack
-from tensorflow.contrib.keras.python.keras.backend import std
-from tensorflow.contrib.keras.python.keras.backend import stop_gradient
-from tensorflow.contrib.keras.python.keras.backend import sum
-from tensorflow.contrib.keras.python.keras.backend import switch
-from tensorflow.contrib.keras.python.keras.backend import tanh
-from tensorflow.contrib.keras.python.keras.backend import temporal_padding
-from tensorflow.contrib.keras.python.keras.backend import to_dense
-from tensorflow.contrib.keras.python.keras.backend import transpose
-from tensorflow.contrib.keras.python.keras.backend import truncated_normal
-from tensorflow.contrib.keras.python.keras.backend import update
-from tensorflow.contrib.keras.python.keras.backend import update_add
-from tensorflow.contrib.keras.python.keras.backend import update_sub
-from tensorflow.contrib.keras.python.keras.backend import var
-from tensorflow.contrib.keras.python.keras.backend import variable
-from tensorflow.contrib.keras.python.keras.backend import zeros
-from tensorflow.contrib.keras.python.keras.backend import zeros_like
+from tensorflow.python.keras._impl.keras.backend import abs
+from tensorflow.python.keras._impl.keras.backend import all
+from tensorflow.python.keras._impl.keras.backend import any
+from tensorflow.python.keras._impl.keras.backend import arange
+from tensorflow.python.keras._impl.keras.backend import argmax
+from tensorflow.python.keras._impl.keras.backend import argmin
+from tensorflow.python.keras._impl.keras.backend import backend
+from tensorflow.python.keras._impl.keras.backend import batch_dot
+from tensorflow.python.keras._impl.keras.backend import batch_flatten
+from tensorflow.python.keras._impl.keras.backend import batch_get_value
+from tensorflow.python.keras._impl.keras.backend import batch_normalization
+from tensorflow.python.keras._impl.keras.backend import batch_set_value
+from tensorflow.python.keras._impl.keras.backend import bias_add
+from tensorflow.python.keras._impl.keras.backend import binary_crossentropy
+from tensorflow.python.keras._impl.keras.backend import cast
+from tensorflow.python.keras._impl.keras.backend import cast_to_floatx
+from tensorflow.python.keras._impl.keras.backend import categorical_crossentropy
+from tensorflow.python.keras._impl.keras.backend import clear_session
+from tensorflow.python.keras._impl.keras.backend import clip
+from tensorflow.python.keras._impl.keras.backend import concatenate
+from tensorflow.python.keras._impl.keras.backend import constant
+from tensorflow.python.keras._impl.keras.backend import conv1d
+from tensorflow.python.keras._impl.keras.backend import conv2d
+from tensorflow.python.keras._impl.keras.backend import conv2d_transpose
+from tensorflow.python.keras._impl.keras.backend import conv3d
+from tensorflow.python.keras._impl.keras.backend import cos
+from tensorflow.python.keras._impl.keras.backend import count_params
+from tensorflow.python.keras._impl.keras.backend import ctc_batch_cost
+from tensorflow.python.keras._impl.keras.backend import ctc_decode
+from tensorflow.python.keras._impl.keras.backend import ctc_label_dense_to_sparse
+from tensorflow.python.keras._impl.keras.backend import dot
+from tensorflow.python.keras._impl.keras.backend import dropout
+from tensorflow.python.keras._impl.keras.backend import dtype
+from tensorflow.python.keras._impl.keras.backend import elu
+from tensorflow.python.keras._impl.keras.backend import epsilon
+from tensorflow.python.keras._impl.keras.backend import equal
+from tensorflow.python.keras._impl.keras.backend import eval
+from tensorflow.python.keras._impl.keras.backend import exp
+from tensorflow.python.keras._impl.keras.backend import expand_dims
+from tensorflow.python.keras._impl.keras.backend import eye
+from tensorflow.python.keras._impl.keras.backend import flatten
+from tensorflow.python.keras._impl.keras.backend import floatx
+from tensorflow.python.keras._impl.keras.backend import foldl
+from tensorflow.python.keras._impl.keras.backend import foldr
+from tensorflow.python.keras._impl.keras.backend import function
+from tensorflow.python.keras._impl.keras.backend import gather
+from tensorflow.python.keras._impl.keras.backend import get_session
+from tensorflow.python.keras._impl.keras.backend import get_uid
+from tensorflow.python.keras._impl.keras.backend import get_value
+from tensorflow.python.keras._impl.keras.backend import gradients
+from tensorflow.python.keras._impl.keras.backend import greater
+from tensorflow.python.keras._impl.keras.backend import greater_equal
+from tensorflow.python.keras._impl.keras.backend import hard_sigmoid
+from tensorflow.python.keras._impl.keras.backend import image_data_format
+from tensorflow.python.keras._impl.keras.backend import in_test_phase
+from tensorflow.python.keras._impl.keras.backend import in_top_k
+from tensorflow.python.keras._impl.keras.backend import in_train_phase
+from tensorflow.python.keras._impl.keras.backend import int_shape
+from tensorflow.python.keras._impl.keras.backend import is_sparse
+from tensorflow.python.keras._impl.keras.backend import l2_normalize
+from tensorflow.python.keras._impl.keras.backend import learning_phase
+from tensorflow.python.keras._impl.keras.backend import less
+from tensorflow.python.keras._impl.keras.backend import less_equal
+from tensorflow.python.keras._impl.keras.backend import log
+from tensorflow.python.keras._impl.keras.backend import manual_variable_initialization
+from tensorflow.python.keras._impl.keras.backend import map_fn
+from tensorflow.python.keras._impl.keras.backend import max
+from tensorflow.python.keras._impl.keras.backend import maximum
+from tensorflow.python.keras._impl.keras.backend import mean
+from tensorflow.python.keras._impl.keras.backend import min
+from tensorflow.python.keras._impl.keras.backend import minimum
+from tensorflow.python.keras._impl.keras.backend import moving_average_update
+from tensorflow.python.keras._impl.keras.backend import name_scope
+from tensorflow.python.keras._impl.keras.backend import ndim
+from tensorflow.python.keras._impl.keras.backend import normalize_batch_in_training
+from tensorflow.python.keras._impl.keras.backend import not_equal
+from tensorflow.python.keras._impl.keras.backend import one_hot
+from tensorflow.python.keras._impl.keras.backend import ones
+from tensorflow.python.keras._impl.keras.backend import ones_like
+from tensorflow.python.keras._impl.keras.backend import permute_dimensions
+from tensorflow.python.keras._impl.keras.backend import placeholder
+from tensorflow.python.keras._impl.keras.backend import pool2d
+from tensorflow.python.keras._impl.keras.backend import pool3d
+from tensorflow.python.keras._impl.keras.backend import pow
+from tensorflow.python.keras._impl.keras.backend import print_tensor
+from tensorflow.python.keras._impl.keras.backend import prod
+from tensorflow.python.keras._impl.keras.backend import random_binomial
+from tensorflow.python.keras._impl.keras.backend import random_normal
+from tensorflow.python.keras._impl.keras.backend import random_normal_variable
+from tensorflow.python.keras._impl.keras.backend import random_uniform
+from tensorflow.python.keras._impl.keras.backend import random_uniform_variable
+from tensorflow.python.keras._impl.keras.backend import relu
+from tensorflow.python.keras._impl.keras.backend import repeat
+from tensorflow.python.keras._impl.keras.backend import repeat_elements
+from tensorflow.python.keras._impl.keras.backend import reset_uids
+from tensorflow.python.keras._impl.keras.backend import reshape
+from tensorflow.python.keras._impl.keras.backend import resize_images
+from tensorflow.python.keras._impl.keras.backend import resize_volumes
+from tensorflow.python.keras._impl.keras.backend import reverse
+from tensorflow.python.keras._impl.keras.backend import rnn
+from tensorflow.python.keras._impl.keras.backend import round
+from tensorflow.python.keras._impl.keras.backend import separable_conv2d
+from tensorflow.python.keras._impl.keras.backend import set_epsilon
+from tensorflow.python.keras._impl.keras.backend import set_floatx
+from tensorflow.python.keras._impl.keras.backend import set_image_data_format
+from tensorflow.python.keras._impl.keras.backend import set_learning_phase
+from tensorflow.python.keras._impl.keras.backend import set_session
+from tensorflow.python.keras._impl.keras.backend import set_value
+from tensorflow.python.keras._impl.keras.backend import shape
+from tensorflow.python.keras._impl.keras.backend import sigmoid
+from tensorflow.python.keras._impl.keras.backend import sign
+from tensorflow.python.keras._impl.keras.backend import sin
+from tensorflow.python.keras._impl.keras.backend import softmax
+from tensorflow.python.keras._impl.keras.backend import softplus
+from tensorflow.python.keras._impl.keras.backend import softsign
+from tensorflow.python.keras._impl.keras.backend import sparse_categorical_crossentropy
+from tensorflow.python.keras._impl.keras.backend import spatial_2d_padding
+from tensorflow.python.keras._impl.keras.backend import spatial_3d_padding
+from tensorflow.python.keras._impl.keras.backend import sqrt
+from tensorflow.python.keras._impl.keras.backend import square
+from tensorflow.python.keras._impl.keras.backend import squeeze
+from tensorflow.python.keras._impl.keras.backend import stack
+from tensorflow.python.keras._impl.keras.backend import std
+from tensorflow.python.keras._impl.keras.backend import stop_gradient
+from tensorflow.python.keras._impl.keras.backend import sum
+from tensorflow.python.keras._impl.keras.backend import switch
+from tensorflow.python.keras._impl.keras.backend import tanh
+from tensorflow.python.keras._impl.keras.backend import temporal_padding
+from tensorflow.python.keras._impl.keras.backend import to_dense
+from tensorflow.python.keras._impl.keras.backend import transpose
+from tensorflow.python.keras._impl.keras.backend import truncated_normal
+from tensorflow.python.keras._impl.keras.backend import update
+from tensorflow.python.keras._impl.keras.backend import update_add
+from tensorflow.python.keras._impl.keras.backend import update_sub
+from tensorflow.python.keras._impl.keras.backend import var
+from tensorflow.python.keras._impl.keras.backend import variable
+from tensorflow.python.keras._impl.keras.backend import zeros
+from tensorflow.python.keras._impl.keras.backend import zeros_like
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/callbacks/__init__.py b/tensorflow/contrib/keras/api/keras/callbacks/__init__.py
index 3a97074857..2d884790dd 100644
--- a/tensorflow/contrib/keras/api/keras/callbacks/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/callbacks/__init__.py
@@ -18,19 +18,19 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.callbacks import BaseLogger
-from tensorflow.contrib.keras.python.keras.callbacks import Callback
-from tensorflow.contrib.keras.python.keras.callbacks import CSVLogger
-from tensorflow.contrib.keras.python.keras.callbacks import EarlyStopping
-from tensorflow.contrib.keras.python.keras.callbacks import History
-from tensorflow.contrib.keras.python.keras.callbacks import LambdaCallback
-from tensorflow.contrib.keras.python.keras.callbacks import LearningRateScheduler
-from tensorflow.contrib.keras.python.keras.callbacks import ModelCheckpoint
-from tensorflow.contrib.keras.python.keras.callbacks import ProgbarLogger
-from tensorflow.contrib.keras.python.keras.callbacks import ReduceLROnPlateau
-from tensorflow.contrib.keras.python.keras.callbacks import RemoteMonitor
-from tensorflow.contrib.keras.python.keras.callbacks import TensorBoard
-from tensorflow.contrib.keras.python.keras.callbacks import TerminateOnNaN
+from tensorflow.python.keras._impl.keras.callbacks import BaseLogger
+from tensorflow.python.keras._impl.keras.callbacks import Callback
+from tensorflow.python.keras._impl.keras.callbacks import CSVLogger
+from tensorflow.python.keras._impl.keras.callbacks import EarlyStopping
+from tensorflow.python.keras._impl.keras.callbacks import History
+from tensorflow.python.keras._impl.keras.callbacks import LambdaCallback
+from tensorflow.python.keras._impl.keras.callbacks import LearningRateScheduler
+from tensorflow.python.keras._impl.keras.callbacks import ModelCheckpoint
+from tensorflow.python.keras._impl.keras.callbacks import ProgbarLogger
+from tensorflow.python.keras._impl.keras.callbacks import ReduceLROnPlateau
+from tensorflow.python.keras._impl.keras.callbacks import RemoteMonitor
+from tensorflow.python.keras._impl.keras.callbacks import TensorBoard
+from tensorflow.python.keras._impl.keras.callbacks import TerminateOnNaN
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/constraints/__init__.py b/tensorflow/contrib/keras/api/keras/constraints/__init__.py
index 6b9e3bf46e..152606d8eb 100644
--- a/tensorflow/contrib/keras/api/keras/constraints/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/constraints/__init__.py
@@ -19,21 +19,21 @@ from __future__ import division
 from __future__ import print_function
 
 # Constraints functions / callable classes.
-from tensorflow.contrib.keras.python.keras.constraints import Constraint
-from tensorflow.contrib.keras.python.keras.constraints import max_norm
-from tensorflow.contrib.keras.python.keras.constraints import MaxNorm
-from tensorflow.contrib.keras.python.keras.constraints import min_max_norm
-from tensorflow.contrib.keras.python.keras.constraints import MinMaxNorm
-from tensorflow.contrib.keras.python.keras.constraints import non_neg
-from tensorflow.contrib.keras.python.keras.constraints import NonNeg
-from tensorflow.contrib.keras.python.keras.constraints import unit_norm
-from tensorflow.contrib.keras.python.keras.constraints import UnitNorm
+from tensorflow.python.keras._impl.keras.constraints import Constraint
+from tensorflow.python.keras._impl.keras.constraints import max_norm
+from tensorflow.python.keras._impl.keras.constraints import MaxNorm
+from tensorflow.python.keras._impl.keras.constraints import min_max_norm
+from tensorflow.python.keras._impl.keras.constraints import MinMaxNorm
+from tensorflow.python.keras._impl.keras.constraints import non_neg
+from tensorflow.python.keras._impl.keras.constraints import NonNeg
+from tensorflow.python.keras._impl.keras.constraints import unit_norm
+from tensorflow.python.keras._impl.keras.constraints import UnitNorm
 
 # Auxiliary utils.
 # pylint: disable=g-bad-import-order
-from tensorflow.contrib.keras.python.keras.constraints import deserialize
-from tensorflow.contrib.keras.python.keras.constraints import serialize
-from tensorflow.contrib.keras.python.keras.constraints import get
+from tensorflow.python.keras._impl.keras.constraints import deserialize
+from tensorflow.python.keras._impl.keras.constraints import serialize
+from tensorflow.python.keras._impl.keras.constraints import get
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/datasets/boston_housing/__init__.py b/tensorflow/contrib/keras/api/keras/datasets/boston_housing/__init__.py
index 0bfd3df540..b5371a03fd 100644
--- a/tensorflow/contrib/keras/api/keras/datasets/boston_housing/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/datasets/boston_housing/__init__.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.datasets.boston_housing import load_data
+from tensorflow.python.keras._impl.keras.datasets.boston_housing import load_data
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/datasets/cifar10/__init__.py b/tensorflow/contrib/keras/api/keras/datasets/cifar10/__init__.py
index f5fac6982a..68d3eb789e 100644
--- a/tensorflow/contrib/keras/api/keras/datasets/cifar10/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/datasets/cifar10/__init__.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.datasets.cifar10 import load_data
+from tensorflow.python.keras._impl.keras.datasets.cifar10 import load_data
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/datasets/cifar100/__init__.py b/tensorflow/contrib/keras/api/keras/datasets/cifar100/__init__.py
index a7e6996136..ca93742673 100644
--- a/tensorflow/contrib/keras/api/keras/datasets/cifar100/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/datasets/cifar100/__init__.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.datasets.cifar100 import load_data
+from tensorflow.python.keras._impl.keras.datasets.cifar100 import load_data
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/datasets/imdb/__init__.py b/tensorflow/contrib/keras/api/keras/datasets/imdb/__init__.py
index f141c8a8e9..1c6396d2d3 100644
--- a/tensorflow/contrib/keras/api/keras/datasets/imdb/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/datasets/imdb/__init__.py
@@ -18,8 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.datasets.imdb import get_word_index
-from tensorflow.contrib.keras.python.keras.datasets.imdb import load_data
+from tensorflow.python.keras._impl.keras.datasets.imdb import get_word_index
+from tensorflow.python.keras._impl.keras.datasets.imdb import load_data
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/datasets/mnist/__init__.py b/tensorflow/contrib/keras/api/keras/datasets/mnist/__init__.py
index 50b74f149c..364255f338 100644
--- a/tensorflow/contrib/keras/api/keras/datasets/mnist/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/datasets/mnist/__init__.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.datasets.mnist import load_data
+from tensorflow.python.keras._impl.keras.datasets.mnist import load_data
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/datasets/reuters/__init__.py b/tensorflow/contrib/keras/api/keras/datasets/reuters/__init__.py
index fc7f1235a3..bb6791a344 100644
--- a/tensorflow/contrib/keras/api/keras/datasets/reuters/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/datasets/reuters/__init__.py
@@ -18,8 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.datasets.reuters import get_word_index
-from tensorflow.contrib.keras.python.keras.datasets.reuters import load_data
+from tensorflow.python.keras._impl.keras.datasets.reuters import get_word_index
+from tensorflow.python.keras._impl.keras.datasets.reuters import load_data
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/initializers/__init__.py b/tensorflow/contrib/keras/api/keras/initializers/__init__.py
index 9b58723ed5..6b1fcfd2d9 100644
--- a/tensorflow/contrib/keras/api/keras/initializers/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/initializers/__init__.py
@@ -19,30 +19,30 @@ from __future__ import division
 from __future__ import print_function
 
 # Initializer functions / callable classes.
-from tensorflow.contrib.keras.python.keras.initializers import Constant
-from tensorflow.contrib.keras.python.keras.initializers import Identity
-from tensorflow.contrib.keras.python.keras.initializers import Initializer
-from tensorflow.contrib.keras.python.keras.initializers import Ones
-from tensorflow.contrib.keras.python.keras.initializers import Orthogonal
-from tensorflow.contrib.keras.python.keras.initializers import RandomNormal
-from tensorflow.contrib.keras.python.keras.initializers import RandomUniform
-from tensorflow.contrib.keras.python.keras.initializers import TruncatedNormal
-from tensorflow.contrib.keras.python.keras.initializers import VarianceScaling
-from tensorflow.contrib.keras.python.keras.initializers import Zeros
+from tensorflow.python.keras._impl.keras.initializers import Constant
+from tensorflow.python.keras._impl.keras.initializers import Identity
+from tensorflow.python.keras._impl.keras.initializers import Initializer
+from tensorflow.python.keras._impl.keras.initializers import Ones
+from tensorflow.python.keras._impl.keras.initializers import Orthogonal
+from tensorflow.python.keras._impl.keras.initializers import RandomNormal
+from tensorflow.python.keras._impl.keras.initializers import RandomUniform
+from tensorflow.python.keras._impl.keras.initializers import TruncatedNormal
+from tensorflow.python.keras._impl.keras.initializers import VarianceScaling
+from tensorflow.python.keras._impl.keras.initializers import Zeros
 
 # Functional interface.
 # pylint: disable=g-bad-import-order
-from tensorflow.contrib.keras.python.keras.initializers import glorot_normal
-from tensorflow.contrib.keras.python.keras.initializers import glorot_uniform
-from tensorflow.contrib.keras.python.keras.initializers import he_normal
-from tensorflow.contrib.keras.python.keras.initializers import he_uniform
-from tensorflow.contrib.keras.python.keras.initializers import lecun_normal
-from tensorflow.contrib.keras.python.keras.initializers import lecun_uniform
+from tensorflow.python.keras._impl.keras.initializers import glorot_normal
+from tensorflow.python.keras._impl.keras.initializers import glorot_uniform
+from tensorflow.python.keras._impl.keras.initializers import he_normal
+from tensorflow.python.keras._impl.keras.initializers import he_uniform
+from tensorflow.python.keras._impl.keras.initializers import lecun_normal
+from tensorflow.python.keras._impl.keras.initializers import lecun_uniform
 
 # Auxiliary utils.
-from tensorflow.contrib.keras.python.keras.initializers import deserialize
-from tensorflow.contrib.keras.python.keras.initializers import serialize
-from tensorflow.contrib.keras.python.keras.initializers import get
+from tensorflow.python.keras._impl.keras.initializers import deserialize
+from tensorflow.python.keras._impl.keras.initializers import serialize
+from tensorflow.python.keras._impl.keras.initializers import get
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/layers/__init__.py b/tensorflow/contrib/keras/api/keras/layers/__init__.py
index aafd189217..acf0a5e179 100644
--- a/tensorflow/contrib/keras/api/keras/layers/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/layers/__init__.py
@@ -20,128 +20,128 @@ from __future__ import print_function
 
 # Generic layers.
 # pylint: disable=g-bad-import-order
-from tensorflow.contrib.keras.python.keras.engine import Input
-from tensorflow.contrib.keras.python.keras.engine import InputLayer
-from tensorflow.contrib.keras.python.keras.engine import InputSpec
-from tensorflow.contrib.keras.python.keras.engine import Layer
+from tensorflow.python.keras._impl.keras.engine import Input
+from tensorflow.python.keras._impl.keras.engine import InputLayer
+from tensorflow.python.keras._impl.keras.engine import InputSpec
+from tensorflow.python.keras._impl.keras.engine import Layer
 
 # Advanced activations.
-from tensorflow.contrib.keras.python.keras.layers.advanced_activations import LeakyReLU
-from tensorflow.contrib.keras.python.keras.layers.advanced_activations import PReLU
-from tensorflow.contrib.keras.python.keras.layers.advanced_activations import ELU
-from tensorflow.contrib.keras.python.keras.layers.advanced_activations import ThresholdedReLU
+from tensorflow.python.keras._impl.keras.layers.advanced_activations import LeakyReLU
+from tensorflow.python.keras._impl.keras.layers.advanced_activations import PReLU
+from tensorflow.python.keras._impl.keras.layers.advanced_activations import ELU
+from tensorflow.python.keras._impl.keras.layers.advanced_activations import ThresholdedReLU
 
 # Convolution layers.
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Conv1D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Conv2D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Conv3D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Conv2DTranspose
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Conv3DTranspose
-from tensorflow.contrib.keras.python.keras.layers.convolutional import SeparableConv2D
+from tensorflow.python.keras._impl.keras.layers.convolutional import Conv1D
+from tensorflow.python.keras._impl.keras.layers.convolutional import Conv2D
+from tensorflow.python.keras._impl.keras.layers.convolutional import Conv3D
+from tensorflow.python.keras._impl.keras.layers.convolutional import Conv2DTranspose
+from tensorflow.python.keras._impl.keras.layers.convolutional import Conv3DTranspose
+from tensorflow.python.keras._impl.keras.layers.convolutional import SeparableConv2D
 
 # Convolution layer aliases.
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Convolution1D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Convolution2D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Convolution3D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Convolution2DTranspose
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Convolution3DTranspose
-from tensorflow.contrib.keras.python.keras.layers.convolutional import SeparableConvolution2D
+from tensorflow.python.keras._impl.keras.layers.convolutional import Convolution1D
+from tensorflow.python.keras._impl.keras.layers.convolutional import Convolution2D
+from tensorflow.python.keras._impl.keras.layers.convolutional import Convolution3D
+from tensorflow.python.keras._impl.keras.layers.convolutional import Convolution2DTranspose
+from tensorflow.python.keras._impl.keras.layers.convolutional import Convolution3DTranspose
+from tensorflow.python.keras._impl.keras.layers.convolutional import SeparableConvolution2D
 
 # Image processing layers.
-from tensorflow.contrib.keras.python.keras.layers.convolutional import UpSampling1D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import UpSampling2D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import UpSampling3D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import ZeroPadding1D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import ZeroPadding2D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import ZeroPadding3D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Cropping1D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Cropping2D
-from tensorflow.contrib.keras.python.keras.layers.convolutional import Cropping3D
+from tensorflow.python.keras._impl.keras.layers.convolutional import UpSampling1D
+from tensorflow.python.keras._impl.keras.layers.convolutional import UpSampling2D
+from tensorflow.python.keras._impl.keras.layers.convolutional import UpSampling3D
+from tensorflow.python.keras._impl.keras.layers.convolutional import ZeroPadding1D
+from tensorflow.python.keras._impl.keras.layers.convolutional import ZeroPadding2D
+from tensorflow.python.keras._impl.keras.layers.convolutional import ZeroPadding3D
+from tensorflow.python.keras._impl.keras.layers.convolutional import Cropping1D
+from tensorflow.python.keras._impl.keras.layers.convolutional import Cropping2D
+from tensorflow.python.keras._impl.keras.layers.convolutional import Cropping3D
 
 # Convolutional-recurrent layers.
-from tensorflow.contrib.keras.python.keras.layers.convolutional_recurrent import ConvLSTM2D
+from tensorflow.python.keras._impl.keras.layers.convolutional_recurrent import ConvLSTM2D
 
 # Core layers.
-from tensorflow.contrib.keras.python.keras.layers.core import Masking
-from tensorflow.contrib.keras.python.keras.layers.core import Dropout
-from tensorflow.contrib.keras.python.keras.layers.core import SpatialDropout1D
-from tensorflow.contrib.keras.python.keras.layers.core import SpatialDropout2D
-from tensorflow.contrib.keras.python.keras.layers.core import SpatialDropout3D
-from tensorflow.contrib.keras.python.keras.layers.core import Activation
-from tensorflow.contrib.keras.python.keras.layers.core import Reshape
-from tensorflow.contrib.keras.python.keras.layers.core import Permute
-from tensorflow.contrib.keras.python.keras.layers.core import Flatten
-from tensorflow.contrib.keras.python.keras.layers.core import RepeatVector
-from tensorflow.contrib.keras.python.keras.layers.core import Lambda
-from tensorflow.contrib.keras.python.keras.layers.core import Dense
-from tensorflow.contrib.keras.python.keras.layers.core import ActivityRegularization
+from tensorflow.python.keras._impl.keras.layers.core import Masking
+from tensorflow.python.keras._impl.keras.layers.core import Dropout
+from tensorflow.python.keras._impl.keras.layers.core import SpatialDropout1D
+from tensorflow.python.keras._impl.keras.layers.core import SpatialDropout2D
+from tensorflow.python.keras._impl.keras.layers.core import SpatialDropout3D
+from tensorflow.python.keras._impl.keras.layers.core import Activation
+from tensorflow.python.keras._impl.keras.layers.core import Reshape
+from tensorflow.python.keras._impl.keras.layers.core import Permute
+from tensorflow.python.keras._impl.keras.layers.core import Flatten
+from tensorflow.python.keras._impl.keras.layers.core import RepeatVector
+from tensorflow.python.keras._impl.keras.layers.core import Lambda
+from tensorflow.python.keras._impl.keras.layers.core import Dense
+from tensorflow.python.keras._impl.keras.layers.core import ActivityRegularization
 
 # Embedding layers.
-from tensorflow.contrib.keras.python.keras.layers.embeddings import Embedding
+from tensorflow.python.keras._impl.keras.layers.embeddings import Embedding
 
 # Locally-connected layers.
-from tensorflow.contrib.keras.python.keras.layers.local import LocallyConnected1D
-from tensorflow.contrib.keras.python.keras.layers.local import LocallyConnected2D
+from tensorflow.python.keras._impl.keras.layers.local import LocallyConnected1D
+from tensorflow.python.keras._impl.keras.layers.local import LocallyConnected2D
 
 # Merge layers.
-from tensorflow.contrib.keras.python.keras.layers.merge import Add
-from tensorflow.contrib.keras.python.keras.layers.merge import Multiply
-from tensorflow.contrib.keras.python.keras.layers.merge import Average
-from tensorflow.contrib.keras.python.keras.layers.merge import Maximum
-from tensorflow.contrib.keras.python.keras.layers.merge import Concatenate
-from tensorflow.contrib.keras.python.keras.layers.merge import Dot
-from tensorflow.contrib.keras.python.keras.layers.merge import add
-from tensorflow.contrib.keras.python.keras.layers.merge import multiply
-from tensorflow.contrib.keras.python.keras.layers.merge import average
-from tensorflow.contrib.keras.python.keras.layers.merge import maximum
-from tensorflow.contrib.keras.python.keras.layers.merge import concatenate
-from tensorflow.contrib.keras.python.keras.layers.merge import dot
+from tensorflow.python.keras._impl.keras.layers.merge import Add
+from tensorflow.python.keras._impl.keras.layers.merge import Multiply
+from tensorflow.python.keras._impl.keras.layers.merge import Average
+from tensorflow.python.keras._impl.keras.layers.merge import Maximum
+from tensorflow.python.keras._impl.keras.layers.merge import Concatenate
+from tensorflow.python.keras._impl.keras.layers.merge import Dot
+from tensorflow.python.keras._impl.keras.layers.merge import add
+from tensorflow.python.keras._impl.keras.layers.merge import multiply
+from tensorflow.python.keras._impl.keras.layers.merge import average
+from tensorflow.python.keras._impl.keras.layers.merge import maximum
+from tensorflow.python.keras._impl.keras.layers.merge import concatenate
+from tensorflow.python.keras._impl.keras.layers.merge import dot
 
 # Noise layers.
-from tensorflow.contrib.keras.python.keras.layers.noise import AlphaDropout
-from tensorflow.contrib.keras.python.keras.layers.noise import GaussianNoise
-from tensorflow.contrib.keras.python.keras.layers.noise import GaussianDropout
+from tensorflow.python.keras._impl.keras.layers.noise import AlphaDropout
+from tensorflow.python.keras._impl.keras.layers.noise import GaussianNoise
+from tensorflow.python.keras._impl.keras.layers.noise import GaussianDropout
 
 # Normalization layers.
-from tensorflow.contrib.keras.python.keras.layers.normalization import BatchNormalization
+from tensorflow.python.keras._impl.keras.layers.normalization import BatchNormalization
 
 # Pooling layers.
-from tensorflow.contrib.keras.python.keras.layers.pooling import MaxPooling1D
-from tensorflow.contrib.keras.python.keras.layers.pooling import MaxPooling2D
-from tensorflow.contrib.keras.python.keras.layers.pooling import MaxPooling3D
-from tensorflow.contrib.keras.python.keras.layers.pooling import AveragePooling1D
-from tensorflow.contrib.keras.python.keras.layers.pooling import AveragePooling2D
-from tensorflow.contrib.keras.python.keras.layers.pooling import AveragePooling3D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalAveragePooling1D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalAveragePooling2D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalAveragePooling3D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalMaxPooling1D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalMaxPooling2D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalMaxPooling3D
+from tensorflow.python.keras._impl.keras.layers.pooling import MaxPooling1D
+from tensorflow.python.keras._impl.keras.layers.pooling import MaxPooling2D
+from tensorflow.python.keras._impl.keras.layers.pooling import MaxPooling3D
+from tensorflow.python.keras._impl.keras.layers.pooling import AveragePooling1D
+from tensorflow.python.keras._impl.keras.layers.pooling import AveragePooling2D
+from tensorflow.python.keras._impl.keras.layers.pooling import AveragePooling3D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalAveragePooling1D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalAveragePooling2D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalAveragePooling3D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPooling1D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPooling2D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPooling3D
 
 # Pooling layer aliases.
-from tensorflow.contrib.keras.python.keras.layers.pooling import MaxPool1D
-from tensorflow.contrib.keras.python.keras.layers.pooling import MaxPool2D
-from tensorflow.contrib.keras.python.keras.layers.pooling import MaxPool3D
-from tensorflow.contrib.keras.python.keras.layers.pooling import AvgPool1D
-from tensorflow.contrib.keras.python.keras.layers.pooling import AvgPool2D
-from tensorflow.contrib.keras.python.keras.layers.pooling import AvgPool3D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalAvgPool1D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalAvgPool2D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalAvgPool3D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalMaxPool1D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalMaxPool2D
-from tensorflow.contrib.keras.python.keras.layers.pooling import GlobalMaxPool3D
+from tensorflow.python.keras._impl.keras.layers.pooling import MaxPool1D
+from tensorflow.python.keras._impl.keras.layers.pooling import MaxPool2D
+from tensorflow.python.keras._impl.keras.layers.pooling import MaxPool3D
+from tensorflow.python.keras._impl.keras.layers.pooling import AvgPool1D
+from tensorflow.python.keras._impl.keras.layers.pooling import AvgPool2D
+from tensorflow.python.keras._impl.keras.layers.pooling import AvgPool3D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalAvgPool1D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalAvgPool2D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalAvgPool3D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPool1D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPool2D
+from tensorflow.python.keras._impl.keras.layers.pooling import GlobalMaxPool3D
 
 # Recurrent layers.
-from tensorflow.contrib.keras.python.keras.layers.recurrent import SimpleRNN
-from tensorflow.contrib.keras.python.keras.layers.recurrent import GRU
-from tensorflow.contrib.keras.python.keras.layers.recurrent import LSTM
+from tensorflow.python.keras._impl.keras.layers.recurrent import SimpleRNN
+from tensorflow.python.keras._impl.keras.layers.recurrent import GRU
+from tensorflow.python.keras._impl.keras.layers.recurrent import LSTM
 
 # Wrapper functions
-from tensorflow.contrib.keras.python.keras.layers.wrappers import Wrapper 
-from tensorflow.contrib.keras.python.keras.layers.wrappers import Bidirectional 
-from tensorflow.contrib.keras.python.keras.layers.wrappers import TimeDistributed
+from tensorflow.python.keras._impl.keras.layers.wrappers import Wrapper
+from tensorflow.python.keras._impl.keras.layers.wrappers import Bidirectional
+from tensorflow.python.keras._impl.keras.layers.wrappers import TimeDistributed
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/losses/__init__.py b/tensorflow/contrib/keras/api/keras/losses/__init__.py
index 06dd679f9c..66721b694f 100644
--- a/tensorflow/contrib/keras/api/keras/losses/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/losses/__init__.py
@@ -19,26 +19,26 @@ from __future__ import division
 from __future__ import print_function
 
 # Loss functions.
-from tensorflow.contrib.keras.python.keras.losses import binary_crossentropy
-from tensorflow.contrib.keras.python.keras.losses import categorical_crossentropy
-from tensorflow.contrib.keras.python.keras.losses import categorical_hinge
-from tensorflow.contrib.keras.python.keras.losses import cosine_proximity
-from tensorflow.contrib.keras.python.keras.losses import hinge
-from tensorflow.contrib.keras.python.keras.losses import kullback_leibler_divergence
-from tensorflow.contrib.keras.python.keras.losses import logcosh
-from tensorflow.contrib.keras.python.keras.losses import mean_absolute_error
-from tensorflow.contrib.keras.python.keras.losses import mean_absolute_percentage_error
-from tensorflow.contrib.keras.python.keras.losses import mean_squared_error
-from tensorflow.contrib.keras.python.keras.losses import mean_squared_logarithmic_error
-from tensorflow.contrib.keras.python.keras.losses import poisson
-from tensorflow.contrib.keras.python.keras.losses import sparse_categorical_crossentropy
-from tensorflow.contrib.keras.python.keras.losses import squared_hinge
+from tensorflow.python.keras._impl.keras.losses import binary_crossentropy
+from tensorflow.python.keras._impl.keras.losses import categorical_crossentropy
+from tensorflow.python.keras._impl.keras.losses import categorical_hinge
+from tensorflow.python.keras._impl.keras.losses import cosine_proximity
+from tensorflow.python.keras._impl.keras.losses import hinge
+from tensorflow.python.keras._impl.keras.losses import kullback_leibler_divergence
+from tensorflow.python.keras._impl.keras.losses import logcosh
+from tensorflow.python.keras._impl.keras.losses import mean_absolute_error
+from tensorflow.python.keras._impl.keras.losses import mean_absolute_percentage_error
+from tensorflow.python.keras._impl.keras.losses import mean_squared_error
+from tensorflow.python.keras._impl.keras.losses import mean_squared_logarithmic_error
+from tensorflow.python.keras._impl.keras.losses import poisson
+from tensorflow.python.keras._impl.keras.losses import sparse_categorical_crossentropy
+from tensorflow.python.keras._impl.keras.losses import squared_hinge
 
 # Auxiliary utils.
 # pylint: disable=g-bad-import-order
-from tensorflow.contrib.keras.python.keras.losses import deserialize
-from tensorflow.contrib.keras.python.keras.losses import serialize
-from tensorflow.contrib.keras.python.keras.losses import get
+from tensorflow.python.keras._impl.keras.losses import deserialize
+from tensorflow.python.keras._impl.keras.losses import serialize
+from tensorflow.python.keras._impl.keras.losses import get
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/metrics/__init__.py b/tensorflow/contrib/keras/api/keras/metrics/__init__.py
index 99496edde2..59faf037bc 100644
--- a/tensorflow/contrib/keras/api/keras/metrics/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/metrics/__init__.py
@@ -19,28 +19,28 @@ from __future__ import division
 from __future__ import print_function
 
 # Metrics functions.
-from tensorflow.contrib.keras.python.keras.metrics import binary_accuracy
-from tensorflow.contrib.keras.python.keras.metrics import binary_crossentropy
-from tensorflow.contrib.keras.python.keras.metrics import categorical_accuracy
-from tensorflow.contrib.keras.python.keras.metrics import categorical_crossentropy
-from tensorflow.contrib.keras.python.keras.metrics import cosine_proximity
-from tensorflow.contrib.keras.python.keras.metrics import hinge
-from tensorflow.contrib.keras.python.keras.metrics import kullback_leibler_divergence
-from tensorflow.contrib.keras.python.keras.metrics import mean_absolute_error
-from tensorflow.contrib.keras.python.keras.metrics import mean_absolute_percentage_error
-from tensorflow.contrib.keras.python.keras.metrics import mean_squared_error
-from tensorflow.contrib.keras.python.keras.metrics import mean_squared_logarithmic_error
-from tensorflow.contrib.keras.python.keras.metrics import poisson
-from tensorflow.contrib.keras.python.keras.metrics import sparse_categorical_crossentropy
-from tensorflow.contrib.keras.python.keras.metrics import sparse_top_k_categorical_accuracy
-from tensorflow.contrib.keras.python.keras.metrics import squared_hinge
-from tensorflow.contrib.keras.python.keras.metrics import top_k_categorical_accuracy
+from tensorflow.python.keras._impl.keras.metrics import binary_accuracy
+from tensorflow.python.keras._impl.keras.metrics import binary_crossentropy
+from tensorflow.python.keras._impl.keras.metrics import categorical_accuracy
+from tensorflow.python.keras._impl.keras.metrics import categorical_crossentropy
+from tensorflow.python.keras._impl.keras.metrics import cosine_proximity
+from tensorflow.python.keras._impl.keras.metrics import hinge
+from tensorflow.python.keras._impl.keras.metrics import kullback_leibler_divergence
+from tensorflow.python.keras._impl.keras.metrics import mean_absolute_error
+from tensorflow.python.keras._impl.keras.metrics import mean_absolute_percentage_error
+from tensorflow.python.keras._impl.keras.metrics import mean_squared_error
+from tensorflow.python.keras._impl.keras.metrics import mean_squared_logarithmic_error
+from tensorflow.python.keras._impl.keras.metrics import poisson
+from tensorflow.python.keras._impl.keras.metrics import sparse_categorical_crossentropy
+from tensorflow.python.keras._impl.keras.metrics import sparse_top_k_categorical_accuracy
+from tensorflow.python.keras._impl.keras.metrics import squared_hinge
+from tensorflow.python.keras._impl.keras.metrics import top_k_categorical_accuracy
 
 # Auxiliary utils.
 # pylint: disable=g-bad-import-order
-from tensorflow.contrib.keras.python.keras.metrics import deserialize
-from tensorflow.contrib.keras.python.keras.metrics import serialize
-from tensorflow.contrib.keras.python.keras.metrics import get
+from tensorflow.python.keras._impl.keras.metrics import deserialize
+from tensorflow.python.keras._impl.keras.metrics import serialize
+from tensorflow.python.keras._impl.keras.metrics import get
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/models/__init__.py b/tensorflow/contrib/keras/api/keras/models/__init__.py
index 4e5b2a1ed0..2fb4ac0960 100644
--- a/tensorflow/contrib/keras/api/keras/models/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/models/__init__.py
@@ -18,13 +18,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.models import load_model
-from tensorflow.contrib.keras.python.keras.models import Model
-from tensorflow.contrib.keras.python.keras.models import model_from_config
-from tensorflow.contrib.keras.python.keras.models import model_from_json
-from tensorflow.contrib.keras.python.keras.models import model_from_yaml
-from tensorflow.contrib.keras.python.keras.models import save_model
-from tensorflow.contrib.keras.python.keras.models import Sequential
+from tensorflow.python.keras._impl.keras.models import load_model
+from tensorflow.python.keras._impl.keras.models import Model
+from tensorflow.python.keras._impl.keras.models import model_from_config
+from tensorflow.python.keras._impl.keras.models import model_from_json
+from tensorflow.python.keras._impl.keras.models import model_from_yaml
+from tensorflow.python.keras._impl.keras.models import save_model
+from tensorflow.python.keras._impl.keras.models import Sequential
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/optimizers/__init__.py b/tensorflow/contrib/keras/api/keras/optimizers/__init__.py
index b3531d7933..44f47bc47f 100644
--- a/tensorflow/contrib/keras/api/keras/optimizers/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/optimizers/__init__.py
@@ -19,20 +19,20 @@ from __future__ import division
 from __future__ import print_function
 
 # Optimizer classes.
-from tensorflow.contrib.keras.python.keras.optimizers import Adadelta
-from tensorflow.contrib.keras.python.keras.optimizers import Adagrad
-from tensorflow.contrib.keras.python.keras.optimizers import Adam
-from tensorflow.contrib.keras.python.keras.optimizers import Adamax
-from tensorflow.contrib.keras.python.keras.optimizers import Nadam
-from tensorflow.contrib.keras.python.keras.optimizers import Optimizer
-from tensorflow.contrib.keras.python.keras.optimizers import RMSprop
-from tensorflow.contrib.keras.python.keras.optimizers import SGD
+from tensorflow.python.keras._impl.keras.optimizers import Adadelta
+from tensorflow.python.keras._impl.keras.optimizers import Adagrad
+from tensorflow.python.keras._impl.keras.optimizers import Adam
+from tensorflow.python.keras._impl.keras.optimizers import Adamax
+from tensorflow.python.keras._impl.keras.optimizers import Nadam
+from tensorflow.python.keras._impl.keras.optimizers import Optimizer
+from tensorflow.python.keras._impl.keras.optimizers import RMSprop
+from tensorflow.python.keras._impl.keras.optimizers import SGD
 
 # Auxiliary utils.
 # pylint: disable=g-bad-import-order
-from tensorflow.contrib.keras.python.keras.optimizers import deserialize
-from tensorflow.contrib.keras.python.keras.optimizers import serialize
-from tensorflow.contrib.keras.python.keras.optimizers import get
+from tensorflow.python.keras._impl.keras.optimizers import deserialize
+from tensorflow.python.keras._impl.keras.optimizers import serialize
+from tensorflow.python.keras._impl.keras.optimizers import get
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/preprocessing/image/__init__.py b/tensorflow/contrib/keras/api/keras/preprocessing/image/__init__.py
index 18ce1becc2..b96e767552 100644
--- a/tensorflow/contrib/keras/api/keras/preprocessing/image/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/preprocessing/image/__init__.py
@@ -18,20 +18,20 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.preprocessing.image import apply_transform
-from tensorflow.contrib.keras.python.keras.preprocessing.image import array_to_img
-from tensorflow.contrib.keras.python.keras.preprocessing.image import DirectoryIterator
-from tensorflow.contrib.keras.python.keras.preprocessing.image import flip_axis
-from tensorflow.contrib.keras.python.keras.preprocessing.image import ImageDataGenerator
-from tensorflow.contrib.keras.python.keras.preprocessing.image import img_to_array
-from tensorflow.contrib.keras.python.keras.preprocessing.image import Iterator
-from tensorflow.contrib.keras.python.keras.preprocessing.image import load_img
-from tensorflow.contrib.keras.python.keras.preprocessing.image import NumpyArrayIterator
-from tensorflow.contrib.keras.python.keras.preprocessing.image import random_channel_shift
-from tensorflow.contrib.keras.python.keras.preprocessing.image import random_rotation
-from tensorflow.contrib.keras.python.keras.preprocessing.image import random_shear
-from tensorflow.contrib.keras.python.keras.preprocessing.image import random_shift
-from tensorflow.contrib.keras.python.keras.preprocessing.image import random_zoom
+from tensorflow.python.keras._impl.keras.preprocessing.image import apply_transform
+from tensorflow.python.keras._impl.keras.preprocessing.image import array_to_img
+from tensorflow.python.keras._impl.keras.preprocessing.image import DirectoryIterator
+from tensorflow.python.keras._impl.keras.preprocessing.image import flip_axis
+from tensorflow.python.keras._impl.keras.preprocessing.image import ImageDataGenerator
+from tensorflow.python.keras._impl.keras.preprocessing.image import img_to_array
+from tensorflow.python.keras._impl.keras.preprocessing.image import Iterator
+from tensorflow.python.keras._impl.keras.preprocessing.image import load_img
+from tensorflow.python.keras._impl.keras.preprocessing.image import NumpyArrayIterator
+from tensorflow.python.keras._impl.keras.preprocessing.image import random_channel_shift
+from tensorflow.python.keras._impl.keras.preprocessing.image import random_rotation
+from tensorflow.python.keras._impl.keras.preprocessing.image import random_shear
+from tensorflow.python.keras._impl.keras.preprocessing.image import random_shift
+from tensorflow.python.keras._impl.keras.preprocessing.image import random_zoom
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/preprocessing/sequence/__init__.py b/tensorflow/contrib/keras/api/keras/preprocessing/sequence/__init__.py
index 2621e9bf53..112f6af5e5 100644
--- a/tensorflow/contrib/keras/api/keras/preprocessing/sequence/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/preprocessing/sequence/__init__.py
@@ -18,9 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.preprocessing.sequence import make_sampling_table
-from tensorflow.contrib.keras.python.keras.preprocessing.sequence import pad_sequences
-from tensorflow.contrib.keras.python.keras.preprocessing.sequence import skipgrams
+from tensorflow.python.keras._impl.keras.preprocessing.sequence import make_sampling_table
+from tensorflow.python.keras._impl.keras.preprocessing.sequence import pad_sequences
+from tensorflow.python.keras._impl.keras.preprocessing.sequence import skipgrams
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/preprocessing/text/__init__.py b/tensorflow/contrib/keras/api/keras/preprocessing/text/__init__.py
index a6b68c3ba6..5bf1a2fb21 100644
--- a/tensorflow/contrib/keras/api/keras/preprocessing/text/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/preprocessing/text/__init__.py
@@ -18,9 +18,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.preprocessing.text import one_hot
-from tensorflow.contrib.keras.python.keras.preprocessing.text import text_to_word_sequence
-from tensorflow.contrib.keras.python.keras.preprocessing.text import Tokenizer
+from tensorflow.python.keras._impl.keras.preprocessing.text import one_hot
+from tensorflow.python.keras._impl.keras.preprocessing.text import text_to_word_sequence
+from tensorflow.python.keras._impl.keras.preprocessing.text import Tokenizer
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/regularizers/__init__.py b/tensorflow/contrib/keras/api/keras/regularizers/__init__.py
index a3b0062d5c..3e707ccab5 100644
--- a/tensorflow/contrib/keras/api/keras/regularizers/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/regularizers/__init__.py
@@ -19,19 +19,19 @@ from __future__ import division
 from __future__ import print_function
 
 # Regularizer functions / callable classes.
-from tensorflow.contrib.keras.python.keras.regularizers import L1L2
-from tensorflow.contrib.keras.python.keras.regularizers import Regularizer
+from tensorflow.python.keras._impl.keras.regularizers import L1L2
+from tensorflow.python.keras._impl.keras.regularizers import Regularizer
 
 # Functional interface.
 # pylint: disable=g-bad-import-order
-from tensorflow.contrib.keras.python.keras.regularizers import l1
-from tensorflow.contrib.keras.python.keras.regularizers import l2
-from tensorflow.contrib.keras.python.keras.regularizers import l1_l2
+from tensorflow.python.keras._impl.keras.regularizers import l1
+from tensorflow.python.keras._impl.keras.regularizers import l2
+from tensorflow.python.keras._impl.keras.regularizers import l1_l2
 
 # Auxiliary utils.
-from tensorflow.contrib.keras.python.keras.regularizers import deserialize
-from tensorflow.contrib.keras.python.keras.regularizers import serialize
-from tensorflow.contrib.keras.python.keras.regularizers import get
+from tensorflow.python.keras._impl.keras.regularizers import deserialize
+from tensorflow.python.keras._impl.keras.regularizers import serialize
+from tensorflow.python.keras._impl.keras.regularizers import get
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/utils/__init__.py b/tensorflow/contrib/keras/api/keras/utils/__init__.py
index d6d70f79d5..a7c2179fe7 100644
--- a/tensorflow/contrib/keras/api/keras/utils/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/utils/__init__.py
@@ -18,21 +18,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.utils.data_utils import GeneratorEnqueuer
-from tensorflow.contrib.keras.python.keras.utils.data_utils import get_file
-from tensorflow.contrib.keras.python.keras.utils.data_utils import Sequence
-from tensorflow.contrib.keras.python.keras.utils.data_utils import SequenceEnqueuer
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import custom_object_scope
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import CustomObjectScope
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import deserialize_keras_object
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import get_custom_objects
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import Progbar
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import serialize_keras_object
-from tensorflow.contrib.keras.python.keras.utils.io_utils import HDF5Matrix
-from tensorflow.contrib.keras.python.keras.utils.layer_utils import convert_all_kernels_in_model
-from tensorflow.contrib.keras.python.keras.utils.np_utils import normalize
-from tensorflow.contrib.keras.python.keras.utils.np_utils import to_categorical
-from tensorflow.contrib.keras.python.keras.utils.vis_utils import plot_model
+from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueuer
+from tensorflow.python.keras._impl.keras.utils.data_utils import get_file
+from tensorflow.python.keras._impl.keras.utils.data_utils import Sequence
+from tensorflow.python.keras._impl.keras.utils.data_utils import SequenceEnqueuer
+from tensorflow.python.keras._impl.keras.utils.generic_utils import custom_object_scope
+from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope
+from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_keras_object
+from tensorflow.python.keras._impl.keras.utils.generic_utils import get_custom_objects
+from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar
+from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object
+from tensorflow.python.keras._impl.keras.utils.io_utils import HDF5Matrix
+from tensorflow.python.keras._impl.keras.utils.layer_utils import convert_all_kernels_in_model
+from tensorflow.python.keras._impl.keras.utils.np_utils import normalize
+from tensorflow.python.keras._impl.keras.utils.np_utils import to_categorical
+from tensorflow.python.keras._impl.keras.utils.vis_utils import plot_model
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/api/keras/wrappers/scikit_learn/__init__.py b/tensorflow/contrib/keras/api/keras/wrappers/scikit_learn/__init__.py
index ba1d28c5c6..a46f859273 100644
--- a/tensorflow/contrib/keras/api/keras/wrappers/scikit_learn/__init__.py
+++ b/tensorflow/contrib/keras/api/keras/wrappers/scikit_learn/__init__.py
@@ -18,8 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.keras.python.keras.wrappers.scikit_learn import KerasClassifier
-from tensorflow.contrib.keras.python.keras.wrappers.scikit_learn import KerasRegressor
+from tensorflow.python.keras._impl.keras.wrappers.scikit_learn import KerasClassifier
+from tensorflow.python.keras._impl.keras.wrappers.scikit_learn import KerasRegressor
 
 del absolute_import
 del division
diff --git a/tensorflow/contrib/keras/python/keras/__init__.py b/tensorflow/contrib/keras/python/keras/__init__.py
deleted file mode 100644
index a3edb29170..0000000000
--- a/tensorflow/contrib/keras/python/keras/__init__.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""The Keras API.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.keras.python.keras import activations
-from tensorflow.contrib.keras.python.keras import applications
-from tensorflow.contrib.keras.python.keras import backend
-from tensorflow.contrib.keras.python.keras import callbacks
-from tensorflow.contrib.keras.python.keras import constraints
-from tensorflow.contrib.keras.python.keras import datasets
-from tensorflow.contrib.keras.python.keras import engine
-from tensorflow.contrib.keras.python.keras import initializers
-from tensorflow.contrib.keras.python.keras import layers
-from tensorflow.contrib.keras.python.keras import losses
-from tensorflow.contrib.keras.python.keras import metrics
-from tensorflow.contrib.keras.python.keras import models
-from tensorflow.contrib.keras.python.keras import optimizers
-from tensorflow.contrib.keras.python.keras import preprocessing
-from tensorflow.contrib.keras.python.keras import regularizers
-from tensorflow.contrib.keras.python.keras import utils
-from tensorflow.contrib.keras.python.keras import wrappers
-from tensorflow.contrib.keras.python.keras.layers import Input
-
-__version__ = '2.0.8-tf'
diff --git a/tensorflow/contrib/keras/python/keras/layers/__init__.py b/tensorflow/contrib/keras/python/keras/layers/__init__.py
deleted file mode 100644
index 9a428f3114..0000000000
--- a/tensorflow/contrib/keras/python/keras/layers/__init__.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras layers module.
-"""
-# pylint: disable=wildcard-import
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.keras.python.keras.engine import Input
-from tensorflow.contrib.keras.python.keras.engine import InputLayer
-from tensorflow.contrib.keras.python.keras.engine import InputSpec
-from tensorflow.contrib.keras.python.keras.engine import Layer
-from tensorflow.contrib.keras.python.keras.layers.advanced_activations import *
-from tensorflow.contrib.keras.python.keras.layers.convolutional import *
-from tensorflow.contrib.keras.python.keras.layers.convolutional_recurrent import *
-from tensorflow.contrib.keras.python.keras.layers.core import *
-from tensorflow.contrib.keras.python.keras.layers.embeddings import *
-from tensorflow.contrib.keras.python.keras.layers.local import *
-from tensorflow.contrib.keras.python.keras.layers.merge import *
-from tensorflow.contrib.keras.python.keras.layers.noise import *
-from tensorflow.contrib.keras.python.keras.layers.normalization import *
-from tensorflow.contrib.keras.python.keras.layers.pooling import *
-from tensorflow.contrib.keras.python.keras.layers.recurrent import *
-from tensorflow.contrib.keras.python.keras.layers.serialization import deserialize
-from tensorflow.contrib.keras.python.keras.layers.serialization import serialize
-from tensorflow.contrib.keras.python.keras.layers.wrappers import *
-
diff --git a/tensorflow/contrib/keras/python/keras/utils/__init__.py b/tensorflow/contrib/keras/python/keras/utils/__init__.py
deleted file mode 100644
index 3b197653f3..0000000000
--- a/tensorflow/contrib/keras/python/keras/utils/__init__.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras utilities.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.keras.python.keras.utils import conv_utils
-from tensorflow.contrib.keras.python.keras.utils import data_utils
-from tensorflow.contrib.keras.python.keras.utils import generic_utils
-from tensorflow.contrib.keras.python.keras.utils import io_utils
-from tensorflow.contrib.keras.python.keras.utils import np_utils
-from tensorflow.contrib.keras.python.keras.utils.data_utils import GeneratorEnqueuer
-from tensorflow.contrib.keras.python.keras.utils.data_utils import get_file
-from tensorflow.contrib.keras.python.keras.utils.data_utils import OrderedEnqueuer
-from tensorflow.contrib.keras.python.keras.utils.data_utils import Sequence
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import custom_object_scope
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import CustomObjectScope
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import deserialize_keras_object
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import get_custom_objects
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import Progbar
-from tensorflow.contrib.keras.python.keras.utils.generic_utils import serialize_keras_object
-from tensorflow.contrib.keras.python.keras.utils.io_utils import HDF5Matrix
-from tensorflow.contrib.keras.python.keras.utils.layer_utils import convert_all_kernels_in_model
-from tensorflow.contrib.keras.python.keras.utils.np_utils import normalize
-from tensorflow.contrib.keras.python.keras.utils.np_utils import to_categorical
-from tensorflow.contrib.keras.python.keras.utils.vis_utils import plot_model
-
-
-# Globally-importable utils.
diff --git a/tensorflow/contrib/keras/python/keras/utils/io_utils_test.py b/tensorflow/contrib/keras/python/keras/utils/io_utils_test.py
deleted file mode 100644
index f6820ee039..0000000000
--- a/tensorflow/contrib/keras/python/keras/utils/io_utils_test.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for io_utils."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import shutil
-
-import numpy as np
-
-from tensorflow.contrib.keras.python import keras
-from tensorflow.python.platform import test
-
-try:
-  import h5py  # pylint:disable=g-import-not-at-top
-except ImportError:
-  h5py = None
-
-
-def create_dataset(h5_path='test.h5'):
-  x = np.random.randn(200, 10).astype('float32')
-  y = np.random.randint(0, 2, size=(200, 1))
-  f = h5py.File(h5_path, 'w')
-  # Creating dataset to store features
-  x_dset = f.create_dataset('my_data', (200, 10), dtype='f')
-  x_dset[:] = x
-  # Creating dataset to store labels
-  y_dset = f.create_dataset('my_labels', (200, 1), dtype='i')
-  y_dset[:] = y
-  f.close()
-
-
-class TestIOUtils(test.TestCase):
-
-  def test_HDF5Matrix(self):
-    if h5py is None:
-      return
-
-    temp_dir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, temp_dir)
-
-    h5_path = os.path.join(temp_dir, 'test.h5')
-    create_dataset(h5_path)
-
-    with self.test_session():
-      # Instantiating HDF5Matrix for the training set,
-      # which is a slice of the first 150 elements
-      x_train = keras.utils.io_utils.HDF5Matrix(
-          h5_path, 'my_data', start=0, end=150)
-      y_train = keras.utils.io_utils.HDF5Matrix(
-          h5_path, 'my_labels', start=0, end=150)
-
-      # Likewise for the test set
-      x_test = keras.utils.io_utils.HDF5Matrix(
-          h5_path, 'my_data', start=150, end=200)
-      y_test = keras.utils.io_utils.HDF5Matrix(
-          h5_path, 'my_labels', start=150, end=200)
-
-      # HDF5Matrix behave more or less like Numpy matrices
-      # with regard to indexing
-      self.assertEqual(y_train.shape, (150, 1))
-      # But they don't support negative indices, so don't try print(x_train[-1])
-
-      self.assertEqual(y_train.dtype, np.dtype('i'))
-      self.assertEqual(y_train.ndim, 2)
-      self.assertEqual(y_train.size, 150)
-
-      model = keras.models.Sequential()
-      model.add(keras.layers.Dense(64, input_shape=(10,), activation='relu'))
-      model.add(keras.layers.Dense(1, activation='sigmoid'))
-      model.compile(loss='binary_crossentropy', optimizer='sgd')
-
-      # Note: you have to use shuffle='batch' or False with HDF5Matrix
-      model.fit(x_train, y_train, batch_size=32, shuffle='batch', verbose=False)
-      # test that evalutation and prediction
-      # don't crash and return reasonable results
-      out_pred = model.predict(x_test, batch_size=32, verbose=False)
-      out_eval = model.evaluate(x_test, y_test, batch_size=32, verbose=False)
-
-      self.assertEqual(out_pred.shape, (50, 1))
-      self.assertEqual(out_eval.shape, ())
-      self.assertGreater(out_eval, 0)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD
index db3be9a991..d35b5556fc 100644
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
@@ -411,33 +411,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "dnn_linear_combined_benchmark_test",
-    size = "medium",
-    srcs = ["python/learn/estimators/dnn_linear_combined_benchmark_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "guitar",
-        "local",
-        "manual",
-        "notap",
-    ],
-    visibility = [
-        "//learning/brain/google/guitar:__subpackages__",
-        "//tensorflow:__subpackages__",
-    ],
-    deps = [
-        ":learn",
-        "//tensorflow/contrib/layers:layers_py",
-        "//tensorflow/contrib/learn/python/learn/datasets",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:training",
-    ],
-)
-
 py_test(
     name = "kmeans_test",
     size = "medium",
@@ -459,32 +432,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "dnn_benchmark_test",
-    size = "medium",
-    srcs = ["python/learn/estimators/dnn_benchmark_test.py"],
-    srcs_version = "PY2AND3",
-    tags = [
-        "guitar",
-        "local",
-        "manual",
-        "notap",
-    ],
-    visibility = [
-        "//learning/brain/google/guitar:__subpackages__",
-        "//tensorflow:__subpackages__",
-    ],
-    deps = [
-        ":learn",
-        "//tensorflow/contrib/layers:layers_py",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:training",
-        "//third_party/py/numpy",
-    ],
-)
-
 py_test(
     name = "dynamic_rnn_estimator_test",
     size = "medium",
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_benchmark_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_benchmark_test.py
deleted file mode 100644
index 86b3eee6ad..0000000000
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_benchmark_test.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Regression test for DNNEstimator."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-import numpy as np
-from tensorflow.contrib.layers.python.layers import feature_column
-from tensorflow.contrib.learn.python.learn.estimators import dnn
-from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils
-from tensorflow.contrib.learn.python.learn.estimators import run_config
-from tensorflow.contrib.learn.python.learn.estimators import test_data
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.platform import test
-from tensorflow.python.training import input as input_lib
-
-
-_METRIC_KEYS = {
-    'accuracy',
-    'auc',
-    'accuracy/threshold_0.500000_mean',
-    'loss',
-    'precision/positive_threshold_0.500000_mean',
-    'recall/positive_threshold_0.500000_mean',
-}
-
-
-class DNNClassifierBenchmark(test.Benchmark):
-
-  def _report_metrics(self, metrics):
-    self.report_benchmark(
-        iters=metrics['global_step'],
-        extras={k: v
-                for k, v in metrics.items() if k in _METRIC_KEYS})
-
-  def _report_predictions(self,
-                          benchmark_name_override,
-                          classifier,
-                          input_fn,
-                          iters,
-                          n_examples,
-                          n_classes,
-                          expected_probabilities=None,
-                          expected_classes=None):
-    probabilities = classifier.predict_proba(
-        input_fn=input_fn, as_iterable=False)
-    if expected_probabilities is not None:
-      np.testing.assert_allclose(
-          expected_probabilities, tuple(probabilities), atol=0.2)
-
-    classes = classifier.predict(input_fn=input_fn, as_iterable=False)
-    if expected_classes is not None:
-      np.testing.assert_array_equal(expected_classes, classes)
-
-    self.report_benchmark(
-        iters=iters,
-        extras={
-            'inference.example%d_class%d_prob' % (i, j): probabilities[i][j]
-            for j in range(n_classes) for i in range(n_examples)
-        }.update({
-            'inference.example%d_class' % i: classes[i]
-            for i in range(n_examples)
-        }),
-        name=benchmark_name_override)
-
-  def benchmarkLogisticMatrixData(self):
-    classifier = dnn.DNNClassifier(
-        feature_columns=(feature_column.real_valued_column(
-            'feature', dimension=4),),
-        hidden_units=(3, 3),
-        config=run_config.RunConfig(tf_random_seed=1))
-    input_fn = test_data.iris_input_logistic_fn
-    steps = 400
-    metrics = classifier.fit(input_fn=input_fn, steps=steps).evaluate(
-        input_fn=input_fn, steps=1)
-    estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
-                                         metrics)
-    estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics)
-    estimator_test_utils.assert_in_range(0.0, 0.3, 'loss', metrics)
-
-    self._report_metrics(metrics)
-
-  def benchmarkLogisticMatrixDataLabels1D(self):
-
-    def _input_fn():
-      iris = test_data.prepare_iris_data_for_logistic_regression()
-      return {
-          'feature': constant_op.constant(
-              iris.data, dtype=dtypes.float32)
-      }, constant_op.constant(
-          iris.target, shape=(100,), dtype=dtypes.int32)
-
-    classifier = dnn.DNNClassifier(
-        feature_columns=(feature_column.real_valued_column(
-            'feature', dimension=4),),
-        hidden_units=(3, 3),
-        config=run_config.RunConfig(tf_random_seed=1))
-    steps = 1000
-    metrics = classifier.fit(input_fn=_input_fn, steps=steps).evaluate(
-        input_fn=_input_fn, steps=1)
-    estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
-                                         metrics)
-    estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics)
-
-    self._report_metrics(metrics)
-
-  def benchmarkLogisticNpMatrixData(self):
-    classifier = dnn.DNNClassifier(
-        feature_columns=(feature_column.real_valued_column(
-            '', dimension=4),),
-        hidden_units=(3, 3),
-        config=run_config.RunConfig(tf_random_seed=1))
-    iris = test_data.prepare_iris_data_for_logistic_regression()
-    train_x = iris.data
-    train_y = iris.target
-    steps = 100
-    metrics = classifier.fit(x=train_x, y=train_y, steps=steps).evaluate(
-        x=train_x, y=train_y, steps=1)
-    estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
-                                         metrics)
-    estimator_test_utils.assert_in_range(0.8, 1.0, 'accuracy', metrics)
-
-    self._report_metrics(metrics)
-
-  def benchmarkLogisticTensorData(self):
-
-    def _input_fn(num_epochs=None):
-      features = {
-          'age':
-              input_lib.limit_epochs(
-                  constant_op.constant(((.8,), (0.2,), (.1,))),
-                  num_epochs=num_epochs),
-          'language':
-              sparse_tensor.SparseTensor(
-                  values=input_lib.limit_epochs(
-                      ('en', 'fr', 'zh'), num_epochs=num_epochs),
-                  indices=((0, 0), (0, 1), (2, 0)),
-                  dense_shape=(3, 2))
-      }
-      return features, constant_op.constant(
-          ((1,), (0,), (0,)), dtype=dtypes.int32)
-
-    lang_column = feature_column.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    classifier = dnn.DNNClassifier(
-        feature_columns=(feature_column.embedding_column(
-            lang_column, dimension=1),
-                         feature_column.real_valued_column('age')),
-        hidden_units=(3, 3),
-        config=run_config.RunConfig(tf_random_seed=1))
-    steps = 100
-    metrics = classifier.fit(input_fn=_input_fn, steps=steps).evaluate(
-        input_fn=_input_fn, steps=1)
-    estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
-                                         metrics)
-    estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics)
-    estimator_test_utils.assert_in_range(0.0, 0.3, 'loss', metrics)
-
-    self._report_metrics(metrics)
-    self._report_predictions(
-        classifier=classifier,
-        input_fn=functools.partial(_input_fn, num_epochs=1),
-        iters=metrics['global_step'],
-        n_examples=3,
-        n_classes=2,
-        expected_classes=(1, 0, 0),
-        benchmark_name_override=(
-            'DNNClassifierBenchmark.benchmarkLogisticTensorData_predictions'))
-
-  def benchmarkLogisticFloatLabel(self):
-
-    def _input_fn(num_epochs=None):
-      features = {
-          'age':
-              input_lib.limit_epochs(
-                  constant_op.constant(((50,), (20,), (10,))),
-                  num_epochs=num_epochs),
-          'language':
-              sparse_tensor.SparseTensor(
-                  values=input_lib.limit_epochs(
-                      ('en', 'fr', 'zh'), num_epochs=num_epochs),
-                  indices=((0, 0), (0, 1), (2, 0)),
-                  dense_shape=(3, 2))
-      }
-      return features, constant_op.constant(
-          ((0.8,), (0.,), (0.2,)), dtype=dtypes.float32)
-
-    lang_column = feature_column.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    n_classes = 2
-    classifier = dnn.DNNClassifier(
-        n_classes=n_classes,
-        feature_columns=(feature_column.embedding_column(
-            lang_column, dimension=1),
-                         feature_column.real_valued_column('age')),
-        hidden_units=(3, 3),
-        config=run_config.RunConfig(tf_random_seed=1))
-    steps = 1000
-    metrics = classifier.fit(input_fn=_input_fn, steps=steps).evaluate(
-        input_fn=_input_fn, steps=1)
-    estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
-                                         metrics)
-
-    # Prediction probabilities mirror the labels column, which proves that the
-    # classifier learns from float input.
-    self._report_metrics(metrics)
-    self._report_predictions(
-        classifier=classifier,
-        input_fn=functools.partial(_input_fn, num_epochs=1),
-        iters=metrics['global_step'],
-        n_examples=3,
-        n_classes=n_classes,
-        expected_probabilities=((0.2, 0.8), (1., 0.), (0.8, 0.2)),
-        expected_classes=(1, 0, 0),
-        benchmark_name_override=(
-            'DNNClassifierBenchmark.benchmarkLogisticFloatLabel_predictions'))
-
-  def benchmarkMultiClassMatrixData(self):
-    """Tests multi-class classification using matrix data as input."""
-    classifier = dnn.DNNClassifier(
-        n_classes=3,
-        feature_columns=(feature_column.real_valued_column(
-            'feature', dimension=4),),
-        hidden_units=(3, 3),
-        config=run_config.RunConfig(tf_random_seed=1))
-
-    input_fn = test_data.iris_input_multiclass_fn
-    steps = 500
-    metrics = classifier.fit(input_fn=input_fn, steps=steps).evaluate(
-        input_fn=input_fn, steps=1)
-    estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
-                                         metrics)
-    estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics)
-    estimator_test_utils.assert_in_range(0.0, 0.4, 'loss', metrics)
-
-    self._report_metrics(metrics)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_benchmark_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_benchmark_test.py
deleted file mode 100644
index 98b7c7e95c..0000000000
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_benchmark_test.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Regression test for DNNLinearCombinedEstimator."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import json
-import tempfile
-from tensorflow.contrib.layers.python.layers import feature_column
-from tensorflow.contrib.learn.python.learn.datasets import base
-from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined
-from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils
-from tensorflow.contrib.learn.python.learn.estimators import run_config
-from tensorflow.contrib.learn.python.learn.estimators import test_data
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import test
-from tensorflow.python.training import adagrad
-from tensorflow.python.training import ftrl
-from tensorflow.python.training import server_lib
-
-
-# Desired training steps, reported in benchmark. Actual steps might be slightly
-# more than this since supervisor training runs for a non-detrministic number of
-# steps.
-_ITERS = 100
-
-_METRIC_KEYS = {
-    'accuracy',
-    'auc',
-    'accuracy/threshold_0.500000_mean',
-    'loss',
-    'precision/positive_threshold_0.500000_mean',
-    'recall/positive_threshold_0.500000_mean',
-}
-
-
-class DNNLinearCombinedClassifierBenchmark(test.Benchmark):
-
-  def _assertSingleClassMetrics(self, metrics):
-    estimator_test_utils.assert_in_range(0.9, 1.0, 'auc', metrics)
-    estimator_test_utils.assert_in_range(0.9, 1.0,
-                                         'accuracy/threshold_0.500000_mean',
-                                         metrics)
-    estimator_test_utils.assert_in_range(
-        0.9, 1.0, 'precision/positive_threshold_0.500000_mean', metrics)
-    estimator_test_utils.assert_in_range(
-        0.9, 1.0, 'recall/positive_threshold_0.500000_mean', metrics)
-    self._assertCommonMetrics(metrics)
-
-  def _assertCommonMetrics(self, metrics):
-    estimator_test_utils.assert_in_range(_ITERS, _ITERS + 5, 'global_step',
-                                         metrics)
-    estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics)
-    estimator_test_utils.assert_in_range(0.0, 0.2, 'loss', metrics)
-    self.report_benchmark(
-        iters=metrics['global_step'],
-        extras={k: v
-                for k, v in metrics.items() if k in _METRIC_KEYS})
-
-  def benchmarkMatrixData(self):
-    iris = test_data.prepare_iris_data_for_logistic_regression()
-    cont_feature = feature_column.real_valued_column('feature', dimension=4)
-    bucketized_feature = feature_column.bucketized_column(
-        cont_feature, test_data.get_quantile_based_buckets(iris.data, 10))
-
-    classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
-        model_dir=tempfile.mkdtemp(),
-        linear_feature_columns=(bucketized_feature,),
-        dnn_feature_columns=(cont_feature,),
-        dnn_hidden_units=(3, 3))
-
-    input_fn = test_data.iris_input_logistic_fn
-    metrics = classifier.fit(input_fn=input_fn, steps=_ITERS).evaluate(
-        input_fn=input_fn, steps=100)
-    self._assertSingleClassMetrics(metrics)
-
-  def benchmarkTensorData(self):
-
-    def _input_fn():
-      iris = test_data.prepare_iris_data_for_logistic_regression()
-      features = {}
-      for i in range(4):
-        # The following shows how to provide the Tensor data for
-        # RealValuedColumns.
-        features.update({
-            str(i):
-                array_ops.reshape(
-                    constant_op.constant(
-                        iris.data[:, i], dtype=dtypes.float32), (-1, 1))
-        })
-      # The following shows how to provide the SparseTensor data for
-      # a SparseColumn.
-      features['dummy_sparse_column'] = sparse_tensor.SparseTensor(
-          values=('en', 'fr', 'zh'),
-          indices=((0, 0), (0, 1), (60, 0)),
-          dense_shape=(len(iris.target), 2))
-      labels = array_ops.reshape(
-          constant_op.constant(
-              iris.target, dtype=dtypes.int32), (-1, 1))
-      return features, labels
-
-    iris = test_data.prepare_iris_data_for_logistic_regression()
-    cont_features = [
-        feature_column.real_valued_column(str(i)) for i in range(4)
-    ]
-    linear_features = [
-        feature_column.bucketized_column(
-            cont_features[i],
-            test_data.get_quantile_based_buckets(iris.data[:, i], 10))
-        for i in range(4)
-    ]
-    linear_features.append(
-        feature_column.sparse_column_with_hash_bucket(
-            'dummy_sparse_column', hash_bucket_size=100))
-
-    classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
-        model_dir=tempfile.mkdtemp(),
-        linear_feature_columns=linear_features,
-        dnn_feature_columns=cont_features,
-        dnn_hidden_units=(3, 3))
-
-    metrics = classifier.fit(input_fn=_input_fn, steps=_ITERS).evaluate(
-        input_fn=_input_fn, steps=100)
-    self._assertSingleClassMetrics(metrics)
-
-  def benchmarkCustomOptimizer(self):
-    iris = test_data.prepare_iris_data_for_logistic_regression()
-    cont_feature = feature_column.real_valued_column('feature', dimension=4)
-    bucketized_feature = feature_column.bucketized_column(
-        cont_feature, test_data.get_quantile_based_buckets(iris.data, 10))
-
-    classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
-        model_dir=tempfile.mkdtemp(),
-        linear_feature_columns=(bucketized_feature,),
-        linear_optimizer=ftrl.FtrlOptimizer(learning_rate=0.1),
-        dnn_feature_columns=(cont_feature,),
-        dnn_hidden_units=(3, 3),
-        dnn_optimizer=adagrad.AdagradOptimizer(learning_rate=0.1))
-
-    input_fn = test_data.iris_input_logistic_fn
-    metrics = classifier.fit(input_fn=input_fn, steps=_ITERS).evaluate(
-        input_fn=input_fn, steps=100)
-    self._assertSingleClassMetrics(metrics)
-
-  def benchmarkMultiClass(self):
-    iris = base.load_iris()
-    cont_feature = feature_column.real_valued_column('feature', dimension=4)
-    bucketized_feature = feature_column.bucketized_column(
-        cont_feature, test_data.get_quantile_based_buckets(iris.data, 10))
-
-    classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
-        n_classes=3,
-        linear_feature_columns=(bucketized_feature,),
-        dnn_feature_columns=(cont_feature,),
-        dnn_hidden_units=(3, 3))
-
-    input_fn = test_data.iris_input_multiclass_fn
-    metrics = classifier.fit(input_fn=input_fn, steps=_ITERS).evaluate(
-        input_fn=input_fn, steps=100)
-    self._assertCommonMetrics(metrics)
-
-  def benchmarkPartitionedVariables(self):
-
-    def _input_fn():
-      features = {
-          'language':
-              sparse_tensor.SparseTensor(
-                  values=('en', 'fr', 'zh'),
-                  indices=((0, 0), (0, 1), (2, 0)),
-                  dense_shape=(3, 2))
-      }
-      labels = constant_op.constant(((1,), (0,), (0,)))
-      return features, labels
-
-    # The given hash_bucket_size results in variables larger than the
-    # default min_slice_size attribute, so the variables are partitioned.
-    sparse_feature = feature_column.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=2e7)
-    embedding_feature = feature_column.embedding_column(
-        sparse_feature, dimension=1)
-
-    tf_config = {
-        'cluster': {
-            run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1']
-        }
-    }
-    with test.mock.patch.dict('os.environ',
-                              {'TF_CONFIG': json.dumps(tf_config)}):
-      config = run_config.RunConfig()
-      # Because we did not start a distributed cluster, we need to pass an
-      # empty ClusterSpec, otherwise the device_setter will look for
-      # distributed jobs, such as "/job:ps" which are not present.
-      config._cluster_spec = server_lib.ClusterSpec({})
-
-    classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
-        linear_feature_columns=(sparse_feature,),
-        dnn_feature_columns=(embedding_feature,),
-        dnn_hidden_units=(3, 3),
-        config=config)
-
-    metrics = classifier.fit(input_fn=_input_fn, steps=_ITERS).evaluate(
-        input_fn=_input_fn, steps=100)
-    self._assertCommonMetrics(metrics)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py
index 225d879678..861db1f89e 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@@ -1070,8 +1070,8 @@ class _MultiClassHead(_SingleHead):
     labels_tensor = _to_labels_tensor(labels, self._label_name)
     _check_no_sparse_tensor(labels_tensor)
     if self._label_keys:
-      table = lookup_ops.index_table_from_tensor(self._label_keys,
-                                                 name="label_id_lookup")
+      table = lookup_ops.index_table_from_tensor(
+          self._label_keys, name="label_id_lookup")
       return {
           "labels": labels_tensor,
           "label_ids": table.lookup(labels_tensor),
diff --git a/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py b/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py
index 0f09b111bd..896b668d4e 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py
@@ -178,7 +178,7 @@ def select_last_activations(activations, sequence_lengths):
   """Selects the nth set of activations for each n in `sequence_length`.
 
   Reuturns a `Tensor` of shape `[batch_size, k]`. If `sequence_length` is not
-  `None`, then `output[i, :] = activations[i, sequence_length[i], :]`. If
+  `None`, then `output[i, :] = activations[i, sequence_length[i] - 1, :]`. If
   `sequence_length` is `None`, then `output[i, :] = activations[i, -1, :]`.
 
   Args:
diff --git a/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py b/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py
index 9aa1e05628..6253f96315 100644
--- a/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py
+++ b/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py
@@ -163,7 +163,7 @@ class ResamplerTest(test.TestCase):
     data_channels = 3
     warp_width = 2
     warp_height = 6
-    batch_size = 10
+    batch_size = 3
 
     warp = _make_warp(batch_size, warp_height, warp_width, dtype.as_numpy_dtype)
     data_shape = (batch_size, data_height, data_width, data_channels)
diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD
index 596c4f351c..ebb7a21856 100644
--- a/tensorflow/contrib/session_bundle/BUILD
+++ b/tensorflow/contrib/session_bundle/BUILD
@@ -234,7 +234,7 @@ cc_library(
 
 cc_test(
     name = "session_bundle_test",
-    size = "small",
+    size = "medium",
     srcs = ["session_bundle_test.cc"],
     data = [":session_bundle_half_plus_two"],
     # Link in all registered kernels.
diff --git a/tensorflow/contrib/session_bundle/session_bundle_test.cc b/tensorflow/contrib/session_bundle/session_bundle_test.cc
index eb36d79e0f..6d997bac9e 100644
--- a/tensorflow/contrib/session_bundle/session_bundle_test.cc
+++ b/tensorflow/contrib/session_bundle/session_bundle_test.cc
@@ -171,7 +171,8 @@ void BasicTest(const string& export_path) {
 // SessionBundles. Concurrent with adding this test, we had a leak where the
 // TensorFlow Session was not being closed, which leaked memory.
 // TODO(b/31711147): Increase the SessionBundle ResourceLeakTest iterations and
-// move outside of the test suite.
+// move outside of the test suite; decrease test size back to small at the same
+// time.
 TEST(LoadSessionBundleFromPath, ResourceLeakTest) {
   const string export_path = test_util::TestSrcDirPath(kExportPath);
   for (int i = 0; i < 100; i++) {
diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD
index bc30502264..527deab86a 100644
--- a/tensorflow/contrib/summary/BUILD
+++ b/tensorflow/contrib/summary/BUILD
@@ -22,10 +22,12 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":summary_ops",
+        "//tensorflow/core:protos_all_py",
         "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:lib",
         "//tensorflow/python:platform",
         "//tensorflow/python:training",
-        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:function",
         "//tensorflow/python/eager:test",
     ],
 )
@@ -38,6 +40,7 @@ py_library(
     deps = [
         ":gen_summary_ops",
         "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:summary_op_util",
diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py
index 05e627adf1..ceaf83b70a 100644
--- a/tensorflow/contrib/summary/summary_ops.py
+++ b/tensorflow/contrib/summary/summary_ops.py
@@ -68,7 +68,8 @@ def never_record_summaries():
 def create_summary_file_writer(logdir,
                                max_queue=None,
                                flush_secs=None,
-                               filename_suffix=None):
+                               filename_suffix=None,
+                               name=None):
   """Creates a summary file writer in the current context."""
   if max_queue is None:
     max_queue = constant_op.constant(10)
@@ -76,7 +77,7 @@ def create_summary_file_writer(logdir,
     flush_secs = constant_op.constant(120)
   if filename_suffix is None:
     filename_suffix = constant_op.constant("")
-  resource = gen_summary_ops.summary_writer()
+  resource = gen_summary_ops.summary_writer(shared_name=name)
   gen_summary_ops.create_summary_file_writer(resource, logdir, max_queue,
                                              flush_secs, filename_suffix)
   context.context().summary_writer_resource = resource
@@ -84,76 +85,87 @@ def create_summary_file_writer(logdir,
 
 def _nothing():
   """Convenient else branch for when summaries do not record."""
-  return
+  return False
 
 
-def generic(name, tensor, metadata, family=None):
-  """Writes a tensor summary if possible."""
+def summary_writer_function(name, tensor, function, family=None):
+  """Helper function to write summaries.
 
+  Args:
+    name: name of the summary
+    tensor: main tensor to form the summary
+    function: function taking a tag and a scope which writes the summary
+    family: optional, the summary's family
+
+  Returns:
+    The result of writing the summary.
+  """
   def record():
     with summary_op_util.summary_scope(
         name, family, values=[tensor]) as (tag, scope):
-      gen_summary_ops.write_summary(context.context().summary_writer_resource,
-                                    training_util.get_global_step(), tensor,
-                                    tag, metadata, name=scope)
+      function(tag, scope)
+      return True
+
   return control_flow_ops.cond(should_record_summaries(), record, _nothing)
 
 
+def generic(name, tensor, metadata, family=None):
+  """Writes a tensor summary if possible."""
+
+  def function(tag, scope):
+    gen_summary_ops.write_summary(context.context().summary_writer_resource,
+                                  training_util.get_global_step(), tensor,
+                                  tag, metadata, name=scope)
+  return summary_writer_function(name, tensor, function, family=family)
+
+
 def scalar(name, tensor, family=None):
   """Writes a scalar summary if possible."""
 
-  def record():
-    with summary_op_util.summary_scope(
-        name, family, values=[tensor]) as (tag, scope):
-      gen_summary_ops.write_scalar_summary(
-          context.context().summary_writer_resource,
-          training_util.get_global_step(), tag, tensor, name=scope)
+  def function(tag, scope):
+    gen_summary_ops.write_scalar_summary(
+        context.context().summary_writer_resource,
+        training_util.get_global_step(), tag, tensor, name=scope)
 
-  return control_flow_ops.cond(should_record_summaries(), record, _nothing)
+  return summary_writer_function(name, tensor, function, family=family)
 
 
 def histogram(name, tensor, family=None):
   """Writes a histogram summary if possible."""
 
-  def record():
-    with summary_op_util.summary_scope(
-        name, family, values=[tensor]) as (tag, scope):
-      gen_summary_ops.write_histogram_summary(
-          context.context().summary_writer_resource,
-          training_util.get_global_step(), tag, tensor, name=scope)
+  def function(tag, scope):
+    gen_summary_ops.write_histogram_summary(
+        context.context().summary_writer_resource,
+        training_util.get_global_step(), tag, tensor, name=scope)
 
-  return control_flow_ops.cond(should_record_summaries(), record, _nothing)
+  return summary_writer_function(name, tensor, function, family=family)
 
 
 def image(name, tensor, bad_color=None, max_images=3, family=None):
   """Writes an image summary if possible."""
 
-  def record():
+  def function(tag, scope):
     if bad_color is None:
       bad_color_ = constant_op.constant([255, 0, 0, 255], dtype=dtypes.uint8)
-    with summary_op_util.summary_scope(
-        name, family, values=[tensor]) as (tag, scope):
-      gen_summary_ops.write_image_summary(
-          context.context().summary_writer_resource,
-          training_util.get_global_step(), tag, tensor, bad_color_, max_images,
-          name=scope)
+    gen_summary_ops.write_image_summary(
+        context.context().summary_writer_resource,
+        training_util.get_global_step(), tag, tensor, bad_color_, max_images,
+        name=scope)
 
-  return control_flow_ops.cond(should_record_summaries(), record, _nothing)
+  return summary_writer_function(name, tensor, function, family=family)
 
 
 def audio(name, tensor, sample_rate, max_outputs, family=None):
   """Writes an audio summary if possible."""
 
-  def record():
-    with summary_op_util.summary_scope(
-        name, family, values=[tensor]) as (tag, scope):
-      gen_summary_ops.write_audio_summary(
-          context.context().summary_writer_resource,
-          training_util.get_global_step(),
-          tag,
-          tensor,
-          sample_rate=sample_rate,
-          max_outputs=max_outputs,
-          name=scope)
-
-  return control_flow_ops.cond(should_record_summaries(), record, _nothing)
+  def function(tag, scope):
+    gen_summary_ops.write_audio_summary(
+        context.context().summary_writer_resource,
+        training_util.get_global_step(),
+        tag,
+        tensor,
+        sample_rate=sample_rate,
+        max_outputs=max_outputs,
+        name=scope)
+
+  return summary_writer_function(name, tensor, function, family=family)
diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py
index 56c1a16f7f..4b1f60ce4e 100644
--- a/tensorflow/contrib/summary/summary_ops_test.py
+++ b/tensorflow/contrib/summary/summary_ops_test.py
@@ -17,11 +17,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
 import tempfile
 
 from tensorflow.contrib.summary import summary_ops
+from tensorflow.core.util import event_pb2
+from tensorflow.python.eager import function
 from tensorflow.python.eager import test
 from tensorflow.python.framework import test_util
+from tensorflow.python.lib.io import tf_record
 from tensorflow.python.platform import gfile
 from tensorflow.python.training import training_util
 
@@ -36,7 +40,7 @@ class TargetTest(test_util.TensorFlowTestCase):
   def testSummaryOps(self):
     training_util.get_or_create_global_step()
     logdir = tempfile.mkdtemp()
-    summary_ops.create_summary_file_writer(logdir, max_queue=0)
+    summary_ops.create_summary_file_writer(logdir, max_queue=0, name='t0')
     summary_ops.always_record_summaries()
     summary_ops.generic('tensor', 1, '')
     summary_ops.scalar('scalar', 2.0)
@@ -47,6 +51,27 @@ class TargetTest(test_util.TensorFlowTestCase):
     # test here that we're calling them correctly.
     self.assertTrue(gfile.Exists(logdir))
 
+  def testDefunSummarys(self):
+    training_util.get_or_create_global_step()
+    logdir = tempfile.mkdtemp()
+    summary_ops.create_summary_file_writer(logdir, max_queue=0, name='t1')
+    summary_ops.always_record_summaries()
+
+    @function.defun
+    def write():
+      summary_ops.scalar('scalar', 2.0)
+
+    write()
+
+    self.assertTrue(gfile.Exists(logdir))
+    files = gfile.ListDirectory(logdir)
+    self.assertEqual(len(files), 1)
+    records = list(tf_record.tf_record_iterator(os.path.join(logdir, files[0])))
+    self.assertEqual(len(records), 2)
+    event = event_pb2.Event()
+    event.ParseFromString(records[1])
+    self.assertEqual(event.summary.value[0].simple_value, 2.0)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/tensor_forest/kernels/v4/split_collection_operators.cc b/tensorflow/contrib/tensor_forest/kernels/v4/split_collection_operators.cc
index ccc412600c..e5d1beae7f 100644
--- a/tensorflow/contrib/tensor_forest/kernels/v4/split_collection_operators.cc
+++ b/tensorflow/contrib/tensor_forest/kernels/v4/split_collection_operators.cc
@@ -96,7 +96,12 @@ void SplitCollectionOperator::AddExample(
 }
 
 bool SplitCollectionOperator::IsInitialized(int32 node_id) const {
-  return stats_.at(node_id)->IsInitialized();
+  auto it = stats_.find(node_id);
+  if (it == stats_.end()) {
+    LOG(WARNING) << "IsInitialized called with unknown node_id = " << node_id;
+    return false;
+  }
+  return it->second->IsInitialized();
 }
 
 void SplitCollectionOperator::CreateAndInitializeCandidateWithExample(
diff --git a/tensorflow/contrib/tensorboard/plugins/trace/trace.py b/tensorflow/contrib/tensorboard/plugins/trace/trace.py
index 57f95dfce7..07e5316b8b 100644
--- a/tensorflow/contrib/tensorboard/plugins/trace/trace.py
+++ b/tensorflow/contrib/tensorboard/plugins/trace/trace.py
@@ -38,7 +38,7 @@ TOKENS = LEFT_TOKENS + RIGHT_TOKENS
 
 
 def store_trace_info(output_file_path,
-                     graph=ops.get_default_graph(),
+                     graph=None,
                      ignore_regex_fpaths=None):
   """Collects and stores trace information for a TensorFlow model.
 
@@ -51,6 +51,8 @@ def store_trace_info(output_file_path,
         in this list will be ignored. Defaults to patterns that match the core
         tensorflow python library.
   """
+  graph = graph or ops.get_default_graph()
+
   if not ignore_regex_fpaths:
     ignore_regex_fpaths = TF_LIB_REGEX_FPATHS
 
diff --git a/tensorflow/contrib/tpu/profiler/op_profile.proto b/tensorflow/contrib/tpu/profiler/op_profile.proto
index 6911b649a0..840a43913b 100644
--- a/tensorflow/contrib/tpu/profiler/op_profile.proto
+++ b/tensorflow/contrib/tpu/profiler/op_profile.proto
@@ -32,6 +32,18 @@ message Node {
     string expression = 2;  // %multiply = [shape]multiply(operand1, operand2)
     string provenance = 3;  // Typically the TensorFlow operation name.
     string category = 4;
+    // Describes the physical memory layout of the instruction's primary input.
+    // e.g. for a convolution, this analyzes the image and ignores the kernel.
+    LayoutAnalysis layout = 5;
+    message LayoutAnalysis {
+      // The physical data layout, from most-minor to most-major dimensions.
+      repeated Dimension dimensions = 1;
+      message Dimension {
+        int32 size = 1;       // Size of the data in this dimension.
+        int32 alignment = 2;  // Data must be padded to a multiple of alignment.
+        string semantics = 3;  // What the dimension represents, e.g. "spatial".
+      }
+    }
   }
 }
 
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 9db2ed830f..9319928307 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -2380,6 +2380,7 @@ tf_cc_tests(
         "util/semver_test.cc",
         "util/sparse/sparse_tensor_test.cc",
         "util/stat_summarizer_test.cc",
+        "util/tensor_format_test.cc",
         "util/tensor_slice_reader_test.cc",
         "util/tensor_slice_set_test.cc",
         "util/tensor_slice_util_test.cc",
diff --git a/tensorflow/core/common_runtime/simple_graph_execution_state.cc b/tensorflow/core/common_runtime/simple_graph_execution_state.cc
index 2a974d1840..363d3a0c9d 100644
--- a/tensorflow/core/common_runtime/simple_graph_execution_state.cc
+++ b/tensorflow/core/common_runtime/simple_graph_execution_state.cc
@@ -36,7 +36,6 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/util.h"
 
@@ -54,7 +53,6 @@ SimpleGraphExecutionState::SimpleGraphExecutionState(
     : stateful_placements_(options.stateful_placements),
       device_set_(options.device_set),
       session_options_(options.session_options),
-      costs_(true /*is_global*/),
       flib_def_(new FunctionLibraryDefinition(OpRegistry::Global(),
                                               graph_def->library())),
       graph_(nullptr) {
@@ -258,19 +256,11 @@ Status SimpleGraphExecutionState::InitBaseGraph(
   // Save stateful placements before placing.
   RestoreStatefulNodes(new_graph.get());
 
-  CostModel costs(true /*is_global*/);
-  {
-    mutex_lock l(mu_);
-    costs_.InitFromGraph(*new_graph);
-    costs.MergeFromGlobal(costs_);
-  }
-
   GraphOptimizationPassOptions optimization_options;
   optimization_options.session_options = session_options_;
   optimization_options.graph = &new_graph;
   optimization_options.flib_def = flib_def_.get();
   optimization_options.device_set = device_set_;
-  optimization_options.cost_model = &costs;
 
   TF_RETURN_IF_ERROR(OptimizationPassRegistry::Global()->RunGrouping(
       OptimizationPassRegistry::PRE_PLACEMENT, optimization_options));
@@ -420,14 +410,11 @@ Status SimpleGraphExecutionState::BuildGraph(
       new FunctionLibraryDefinition(*flib_def_));
 
   // TODO(andydavis): Clarify optimization pass requirements around CostModel.
-  CostModel costs(true /*is_global*/);
-  costs.MergeFromGlobal(costs_);
   GraphOptimizationPassOptions optimization_options;
   optimization_options.session_options = session_options_;
   optimization_options.graph = &ng;
   optimization_options.flib_def = flib.get();
   optimization_options.device_set = device_set_;
-  optimization_options.cost_model = &costs;
 
   TF_RETURN_IF_ERROR(OptimizationPassRegistry::Global()->RunGrouping(
       OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, optimization_options));
diff --git a/tensorflow/core/common_runtime/simple_graph_execution_state.h b/tensorflow/core/common_runtime/simple_graph_execution_state.h
index c7f34a42d6..53eef8a07d 100644
--- a/tensorflow/core/common_runtime/simple_graph_execution_state.h
+++ b/tensorflow/core/common_runtime/simple_graph_execution_state.h
@@ -25,19 +25,14 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/framework/graph.pb.h"
-#include "tensorflow/core/framework/step_stats.pb.h"
 #include "tensorflow/core/graph/costmodel.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/macros.h"
-#include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 struct SessionOptions;
-class StepStats;
-class Timeline;
 
 namespace subgraph {
 struct RewriteGraphMetadata;
@@ -167,7 +162,6 @@ class SimpleGraphExecutionState {
   // Returns the map of stateful placements as a map of
   // node name to placement string.
   std::unordered_map GetStatefulPlacements() const {
-    mutex_lock l(mu_);
     return stateful_placements_;
   }
 
@@ -193,9 +187,6 @@ class SimpleGraphExecutionState {
   const DeviceSet* device_set_;            // Not owned
   const SessionOptions* session_options_;  // Not owned
 
-  mutable mutex mu_;
-  CostModel costs_ GUARDED_BY(mu_);
-
   // Map from name to Node for the full graph in placed_.
   NodeNameToCostIdMap node_name_to_cost_id_map_;
 
diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 0e3ea2ddfb..ab21f47282 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -206,15 +206,28 @@ Status BiasAddGradShape(shape_inference::InferenceContext* c) {
 Status FusedConvBiasActivationShape(shape_inference::InferenceContext* c) {
   TF_RETURN_IF_ERROR(Conv2DShape(c));
 
-  ShapeHandle bias_shape;
-  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(2), 1, &bias_shape));
-  DimensionHandle bias_dim = c->Dim(bias_shape, 0);
+  string data_format_str, filter_format_str;
+  TF_RETURN_IF_ERROR(c->GetAttr("data_format", &data_format_str));
+  TF_RETURN_IF_ERROR(c->GetAttr("filter_format", &filter_format_str));
+
+  TensorFormat data_format;
+  FormatFromString(data_format_str, &data_format);
+  FilterTensorFormat filter_format;
+  FilterFormatFromString(filter_format_str, &filter_format);
 
+  constexpr int num_spatial_dims = 2;
+  const int rank = GetTensorDimsFromSpatialDims(num_spatial_dims, data_format);
   ShapeHandle filter_shape;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 4, &filter_shape));
-  DimensionHandle output_depth_dim = c->Dim(filter_shape, 3);
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), rank, &filter_shape));
 
+  DimensionHandle output_depth_dim = c->Dim(
+      filter_shape, GetFilterDimIndex(filter_format, 'O'));
   int64 output_depth_dim_val = c->Value(output_depth_dim);
+
+  ShapeHandle bias_shape;
+  // Bias should be a 1-D tensor.
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &bias_shape));
+  DimensionHandle bias_dim = c->Dim(bias_shape, 0);
   int64 bias_dim_val = c->Value(bias_dim);
 
   if (output_depth_dim_val != bias_dim_val) {
@@ -223,6 +236,14 @@ Status FusedConvBiasActivationShape(shape_inference::InferenceContext* c) {
         ") and bias dimension (", bias_dim_val, ") do not match.");
   }
 
+  // Check side input shape matches the output shape.
+  ShapeHandle side_input_shape;
+  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(3), 1, &side_input_shape));
+  if (c->Rank(side_input_shape) > 1) {
+    ShapeHandle unused;
+    TF_RETURN_IF_ERROR(c->Merge(side_input_shape, c->output(0), &unused));
+  }
+
   return Status::OK();
 }
 
@@ -323,24 +344,38 @@ Status ShapeFromDimensions(DimensionHandle batch_dim,
 }
 
 Status Conv2DShape(shape_inference::InferenceContext* c) {
-  string data_format_str;
-  Status s = c->GetAttr("data_format", &data_format_str);
-  if (!s.ok()) {
+  string data_format_str, filter_format_str;
+  if (!c->GetAttr("data_format", &data_format_str).ok()) {
     data_format_str = "NHWC";
   }
+  if (!c->GetAttr("filter_format", &filter_format_str).ok()) {
+    filter_format_str = "HWIO";
+  }
 
   TensorFormat data_format;
   if (!FormatFromString(data_format_str, &data_format)) {
     return errors::InvalidArgument("Invalid data format string: ",
                                    data_format_str);
   }
+  FilterTensorFormat filter_format;
+  if (!FilterFormatFromString(filter_format_str, &filter_format)) {
+    return errors::InvalidArgument("Invalid filter format string: ",
+                                   filter_format_str);
+  }
+
+  constexpr int num_spatial_dims = 2;
+  const int rank = GetTensorDimsFromSpatialDims(num_spatial_dims, data_format);
+  ShapeHandle conv_input_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), rank, &conv_input_shape));
+  TF_RETURN_IF_ERROR(CheckFormatConstraintsOnShape(
+      data_format, conv_input_shape, "conv_input", c));
 
-  const int rank = GetTensorDimsFromSpatialDims(2, data_format);
-  ShapeHandle input_shape;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), rank, &input_shape));
   // The filter rank should match the input (4 for NCHW, 5 for NCHW_VECT_C).
   ShapeHandle filter_shape;
   TF_RETURN_IF_ERROR(c->WithRank(c->input(1), rank, &filter_shape));
+  TF_RETURN_IF_ERROR(
+      CheckFormatConstraintsOnShape(data_format, filter_shape, "filter", c));
+
   std::vector strides;
   TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides));
 
@@ -352,38 +387,33 @@ Status Conv2DShape(shape_inference::InferenceContext* c) {
                                    strides.size());
   }
 
-  int32 stride_rows, stride_cols;
-  if (data_format == FORMAT_NCHW || data_format == FORMAT_NCHW_VECT_C) {
-    stride_rows = strides[2];
-    stride_cols = strides[3];
-  } else {
-    stride_rows = strides[1];
-    stride_cols = strides[2];
-  }
+  const int32 stride_rows = GetTensorDim(strides, data_format, 'H');
+  const int32 stride_cols = GetTensorDim(strides, data_format, 'W');
 
   DimensionHandle batch_size_dim;
   DimensionHandle input_depth_dim;
   gtl::InlinedVector input_spatial_dims(2);
-  TF_RETURN_IF_ERROR(DimensionsFromShape(input_shape, data_format,
+  TF_RETURN_IF_ERROR(DimensionsFromShape(conv_input_shape, data_format,
                                          &batch_size_dim, &input_spatial_dims,
                                          &input_depth_dim, c));
 
-  DimensionHandle output_depth_dim, filter_rows_dim, filter_cols_dim,
-      filter_input_depth_dim;
-  // If the input format is NCHW_VECT_C, the filter format is assumed to be
-  // OIHW_VECT_I, otherwise it is assumed to be HWIO.
-  if (data_format == FORMAT_NCHW_VECT_C) {
-    output_depth_dim = c->Dim(filter_shape, 0);
-    TF_RETURN_IF_ERROR(c->Multiply(c->Dim(filter_shape, 1),
-                                   c->Dim(filter_shape, 4),
-                                   &filter_input_depth_dim));
-    filter_rows_dim = c->Dim(filter_shape, 2);
-    filter_cols_dim = c->Dim(filter_shape, 3);
+  DimensionHandle output_depth_dim = c->Dim(
+      filter_shape, GetFilterDimIndex(filter_format, 'O'));
+  DimensionHandle filter_rows_dim = c->Dim(
+      filter_shape, GetFilterDimIndex(filter_format, 'H'));
+  DimensionHandle filter_cols_dim = c->Dim(
+      filter_shape, GetFilterDimIndex(filter_format, 'W'));
+  DimensionHandle filter_input_depth_dim;
+  if (filter_format == FORMAT_OIHW_VECT_I) {
+    TF_RETURN_IF_ERROR(c->Multiply(
+        c->Dim(filter_shape,
+               GetFilterDimIndex(filter_format, 'I')),
+        c->Dim(filter_shape,
+               GetFilterTensorInnerInputChannelsDimIndex(rank, filter_format)),
+        &filter_input_depth_dim));
   } else {
-    filter_rows_dim = c->Dim(filter_shape, 0);
-    filter_cols_dim = c->Dim(filter_shape, 1);
-    filter_input_depth_dim = c->Dim(filter_shape, 2);
-    output_depth_dim = c->Dim(filter_shape, 3);
+    filter_input_depth_dim = c->Dim(
+        filter_shape, GetFilterDimIndex(filter_format, 'I'));
   }
 
   // Check that the input tensor and the filter tensor agree on the input
@@ -559,9 +589,6 @@ Status DepthwiseConv2DNativeShape(shape_inference::InferenceContext* c) {
 }
 
 Status AvgPoolShape(shape_inference::InferenceContext* c) {
-  ShapeHandle input_shape;
-  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 4, &input_shape));
-
   string data_format_str;
   TensorFormat data_format;
   Status s = c->GetAttr("data_format", &data_format_str);
@@ -571,6 +598,10 @@ Status AvgPoolShape(shape_inference::InferenceContext* c) {
     data_format = FORMAT_NHWC;
   }
 
+  const int rank = (data_format == FORMAT_NCHW_VECT_C) ? 5 : 4;
+  ShapeHandle input_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), rank, &input_shape));
+
   TF_RETURN_IF_ERROR(
       CheckFormatConstraintsOnShape(data_format, input_shape, "input", c));
 
@@ -627,9 +658,6 @@ Status AvgPoolShape(shape_inference::InferenceContext* c) {
 }
 
 Status MaxPoolShape(shape_inference::InferenceContext* c) {
-  ShapeHandle input_shape;
-  TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 4, &input_shape));
-
   string data_format_str;
   TensorFormat data_format;
   Status s = c->GetAttr("data_format", &data_format_str);
@@ -639,6 +667,10 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) {
     data_format = FORMAT_NHWC;
   }
 
+  const int rank = (data_format == FORMAT_NCHW_VECT_C) ? 5 : 4;
+  ShapeHandle input_shape;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), rank, &input_shape));
+
   TF_RETURN_IF_ERROR(
       CheckFormatConstraintsOnShape(data_format, input_shape, "input", c));
 
@@ -696,11 +728,21 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) {
 }
 
 Status MaxPoolV2Shape(shape_inference::InferenceContext* c, int num_inputs) {
+  string data_format_str;
+  TensorFormat data_format;
+  Status s = c->GetAttr("data_format", &data_format_str);
+  if (s.ok()) {
+    FormatFromString(data_format_str, &data_format);
+  } else {
+    data_format = FORMAT_NHWC;
+  }
+
+  const int rank = (data_format == FORMAT_NCHW_VECT_C) ? 5 : 4;
   ShapeHandle input_shape;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input_shape));
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), rank, &input_shape));
 
-  string data_format;
-  Status s = c->GetAttr("data_format", &data_format);
+  TF_RETURN_IF_ERROR(
+      CheckFormatConstraintsOnShape(data_format, input_shape, "input", c));
 
   std::vector kernel_sizes;
   std::vector strides;
@@ -725,7 +767,8 @@ Status MaxPoolV2Shape(shape_inference::InferenceContext* c, int num_inputs) {
     }
     kernel_sizes.resize(kernel_sizes_tensor->shape().num_elements());
     auto kernel_sizes_vec = kernel_sizes_tensor->flat();
-    std::copy_n(&kernel_sizes_vec(0), kernel_sizes.size(), kernel_sizes.begin());
+    std::copy_n(&kernel_sizes_vec(0), kernel_sizes.size(),
+                kernel_sizes.begin());
 
     const Tensor* strides_tensor = c->input_tensor(c->num_inputs() - 1);
     if (strides_tensor == nullptr) {
@@ -749,35 +792,22 @@ Status MaxPoolV2Shape(shape_inference::InferenceContext* c, int num_inputs) {
         kernel_sizes.size());
   }
 
-  int32 stride_rows, stride_cols, stride_depth;
-  int32 kernel_rows, kernel_cols, kernel_depth;
-
-  if (s.ok() && data_format == "NCHW") {
-    // Canonicalize input shape to NHWC so the shape inference code below can
-    // process it.
-    auto dim = [&](char dimension) {
-      return c->Dim(input_shape, GetTensorDimIndex<2>(FORMAT_NCHW, dimension));
-    };
-    input_shape = c->MakeShape({{dim('N'), dim('0'), dim('1'), dim('C')}});
-    stride_depth = strides[1];
-    stride_rows = strides[2];
-    stride_cols = strides[3];
-    kernel_depth = kernel_sizes[1];
-    kernel_rows = kernel_sizes[2];
-    kernel_cols = kernel_sizes[3];
-  } else {
-    stride_rows = strides[1];
-    stride_cols = strides[2];
-    stride_depth = strides[3];
-    kernel_rows = kernel_sizes[1];
-    kernel_cols = kernel_sizes[2];
-    kernel_depth = kernel_sizes[3];
-  }
+  int32 stride_depth = GetTensorDim(strides, data_format, 'C');
+  int32 stride_rows = GetTensorDim(strides, data_format, 'H');
+  int32 stride_cols = GetTensorDim(strides, data_format, 'W');
+  int32 kernel_depth = GetTensorDim(kernel_sizes, data_format, 'C');
+  int32 kernel_rows = GetTensorDim(kernel_sizes, data_format, 'H');
+  int32 kernel_cols = GetTensorDim(kernel_sizes, data_format, 'W');
 
-  DimensionHandle batch_size_dim = c->Dim(input_shape, 0);
-  DimensionHandle in_rows_dim = c->Dim(input_shape, 1);
-  DimensionHandle in_cols_dim = c->Dim(input_shape, 2);
-  DimensionHandle in_depth_dim = c->Dim(input_shape, 3);
+  constexpr int num_spatial_dims = 2;
+  DimensionHandle batch_size_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'N'));
+  DimensionHandle in_rows_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'H'));
+  DimensionHandle in_cols_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'W'));
+  DimensionHandle in_depth_dim = c->Dim(
+      input_shape, GetTensorDimIndex(data_format, 'C'));
 
   Padding padding;
   TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding));
@@ -791,15 +821,9 @@ Status MaxPoolV2Shape(shape_inference::InferenceContext* c, int num_inputs) {
   TF_RETURN_IF_ERROR(GetWindowedOutputSizeFromDims(
       c, in_depth_dim, kernel_depth, stride_depth, padding, &output_depth));
 
-  output_shape =
-      c->MakeShape({batch_size_dim, output_rows, output_cols, output_depth});
-  if (data_format == "NCHW") {
-    // Convert output shape back to expected NCHW data format.
-    auto dim = [&](char dimension) {
-      return c->Dim(output_shape, GetTensorDimIndex<2>(FORMAT_NHWC, dimension));
-    };
-    output_shape = c->MakeShape({{dim('N'), dim('C'), dim('0'), dim('1')}});
-  }
+  TF_RETURN_IF_ERROR(MakeShapeFromFormat(data_format, batch_size_dim,
+                                         {output_rows, output_cols},
+                                         output_depth, &output_shape, c));
 
   c->set_output(0, output_shape);
   return Status::OK();
diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc
index 14f6c1bb45..ec9746b2af 100644
--- a/tensorflow/core/framework/common_shape_fns_test.cc
+++ b/tensorflow/core/framework/common_shape_fns_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/framework/common_shape_fns.h"
 
+#include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/op_def_builder.h"
 #include "tensorflow/core/framework/shape_inference_testutil.h"
@@ -411,34 +412,35 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
 TEST(CommonShapeFnsTest, Conv2DShapeTest) {
   ShapeInferenceTestOp op("Conv2D");
   auto set_op = [&op](const std::vector& strides, const string& padding,
-                      const string& data_format) {
+                      const string& data_format, const string& filter_format) {
     TF_CHECK_OK(NodeDefBuilder("test", "Conv2D")
                     .Input("input", 0, DT_FLOAT)
                     .Input("filter", 0, DT_FLOAT)
                     .Attr("strides", strides)
                     .Attr("padding", padding)
                     .Attr("data_format", data_format)
+                    .Attr("filter_format", filter_format)
                     .Finalize(&op.node_def));
   };
 
   // 1x1 filter
-  set_op({{1, 1, 1, 1}}, "VALID", "NHWC");
+  set_op({{1, 1, 1, 1}}, "VALID", "NHWC", "HWIO");
   INFER_OK(op, "[1,2,2,1];[1,1,1,1]", "[d0_0,2,2,d1_3]");
 
   // 2x2 filter
-  set_op({{1, 1, 1, 1}}, "VALID", "NHWC");
+  set_op({{1, 1, 1, 1}}, "VALID", "NHWC", "HWIO");
   INFER_OK(op, "[1,2,2,1];[2,2,1,1]", "[d0_0,1,1,d1_3]");
 
   // 3x3 input, 1x1 filter, 2x2 stride
-  set_op({{1, 2, 2, 1}}, "VALID", "NHWC");
+  set_op({{1, 2, 2, 1}}, "VALID", "NHWC", "HWIO");
   INFER_OK(op, "[1,3,3,1];[1,1,1,1]", "[d0_0,2,2,d1_3]");
 
   // 3x3 input, 1x1 filter, 2x1 stride
-  set_op({{1, 2, 1, 1}}, "VALID", "NHWC");
+  set_op({{1, 2, 1, 1}}, "VALID", "NHWC", "HWIO");
   INFER_OK(op, "[1,3,3,1];[1,1,1,1]", "[d0_0,2,3,d1_3]");
 
   // 4x4 input, 2x1 filter, 1x2 stride
-  set_op({{1, 1, 2, 1}}, "VALID", "NHWC");
+  set_op({{1, 1, 2, 1}}, "VALID", "NHWC", "HWIO");
   INFER_OK(op, "[1,4,4,1];[2,1,1,1]", "[d0_0,3,2,d1_3]");
 
   // Invalid rank for input
@@ -460,77 +462,76 @@ TEST(CommonShapeFnsTest, Conv2DShapeTest) {
 
   // Tests for NCHW
   // 1x1 filter
-  set_op({{1, 1, 1, 1}}, "VALID", "NCHW");
+  set_op({{1, 1, 1, 1}}, "VALID", "NCHW", "HWIO");
   INFER_OK(op, "[1,1,2,2];[1,1,1,1]", "[d0_0,d1_3,2,2]");
 
   // 2x2 filter
-  set_op({{1, 1, 1, 1}}, "VALID", "NCHW");
+  set_op({{1, 1, 1, 1}}, "VALID", "NCHW", "HWIO");
   INFER_OK(op, "[1,1,2,2];[2,2,1,1]", "[d0_0,d1_3,1,1]");
 
   // 3x3 input, 1x1 filter, 2x2 stride
-  set_op({{1, 1, 2, 2}}, "VALID", "NCHW");
+  set_op({{1, 1, 2, 2}}, "VALID", "NCHW", "HWIO");
   INFER_OK(op, "[1,1,3,3];[1,1,1,1]", "[d0_0,d1_3,2,2]");
 
   // 3x3 input, 1x1 filter, 2x1 stride
-  set_op({{1, 1, 2, 1}}, "VALID", "NCHW");
+  set_op({{1, 1, 2, 1}}, "VALID", "NCHW", "HWIO");
   INFER_OK(op, "[1,1,3,3];[1,1,1,1]", "[d0_0,d1_3,2,3]");
 
   // 4x4 input, 2x1 filter, 1x2 stride
-  set_op({{1, 1, 1, 2}}, "VALID", "NCHW");
+  set_op({{1, 1, 1, 2}}, "VALID", "NCHW", "HWIO");
   INFER_OK(op, "[1,1,4,4];[2,1,1,1]", "[d0_0,d1_3,3,2]");
 
   // Tests for NCHW_VECT_C
   // 1x1 filter
-  set_op({{1, 1, 1, 1}}, "VALID", "NCHW_VECT_C");
+  set_op({{1, 1, 1, 1}}, "VALID", "NCHW_VECT_C", "OIHW_VECT_I");
   INFER_OK(op, "[1,1,2,2,4];[4,1,1,1,4]", "[d0_0,1,2,2,4]");
 
   // 2x2 filter
-  set_op({{1, 1, 1, 1}}, "VALID", "NCHW_VECT_C");
+  set_op({{1, 1, 1, 1}}, "VALID", "NCHW_VECT_C", "OIHW_VECT_I");
   INFER_OK(op, "[1,1,2,2,4];[4,1,2,2,4]", "[d0_0,1,1,1,4]");
 
   // 3x3 input, 1x1 filter, 2x2 stride
-  set_op({{1, 1, 2, 2}}, "VALID", "NCHW_VECT_C");
+  set_op({{1, 1, 2, 2}}, "VALID", "NCHW_VECT_C", "OIHW_VECT_I");
   INFER_OK(op, "[1,1,3,3,4];[8,1,1,1,4]", "[d0_0,2,2,2,4]");
 
   // 3x3 input, 1x1 filter, 2x1 stride
-  set_op({{1, 1, 2, 1}}, "VALID", "NCHW_VECT_C");
+  set_op({{1, 1, 2, 1}}, "VALID", "NCHW_VECT_C", "OIHW_VECT_I");
   INFER_OK(op, "[1,1,3,3,4];[4,1,1,1,4]", "[d0_0,1,2,3,4]");
 
   // 4x4 input, 2x1 filter, 1x2 stride
-  set_op({{1, 1, 1, 2}}, "VALID", "NCHW_VECT_C");
+  set_op({{1, 1, 1, 2}}, "VALID", "NCHW_VECT_C", "OIHW_VECT_I");
   INFER_OK(op, "[1,1,4,4,4];[4,1,2,1,4]", "[d0_0,1,3,2,4]");
 
   // Some tests for "SAME" padding
 
   // 4x4 input, 1x1 filter, 1x1 stride
-  set_op({{1, 1, 1, 1}}, "SAME", "NHWC");
+  set_op({{1, 1, 1, 1}}, "SAME", "NHWC", "HWIO");
   INFER_OK(op, "[1,4,4,1];[1,1,1,1]", "[d0_0,d0_1,d0_2,d1_3]");
 
   // 3x3 input, 2x2 filter, 1x1 stride
-  set_op({{1, 1, 1, 1}}, "SAME", "NHWC");
+  set_op({{1, 1, 1, 1}}, "SAME", "NHWC", "HWIO");
   INFER_OK(op, "[1,3,3,1];[2,2,1,1]", "[d0_0,d0_1,d0_2,d1_3]");
 
   // 4x4 input, 2x2 filter, 2x2 stride
-  set_op({{1, 2, 2, 1}}, "SAME", "NHWC");
+  set_op({{1, 2, 2, 1}}, "SAME", "NHWC", "HWIO");
   INFER_OK(op, "[1,4,4,1];[2,2,1,1]", "[d0_0,2,2,d1_3]");
 
   // 4x4 input, 2x2 filter, 1x1 stride
-  set_op({{1, 1, 1, 1}}, "SAME", "NHWC");
+  set_op({{1, 1, 1, 1}}, "SAME", "NHWC", "HWIO");
   INFER_OK(op, "[1,4,4,1];[2,2,1,1]", "[d0_0,d0_1,d0_2,d1_3]");
 
   // With stride 1x1 and SAME, unknown dims don't matter - filter dims except
   // for output channels are ignored for output, so all inputs are carried
   // through to output.
-  set_op({{1, 1, 1, 1}}, "SAME", "NHWC");
+  set_op({{1, 1, 1, 1}}, "SAME", "NHWC", "HWIO");
   INFER_OK(op, "[1,4,4,1];[?,?,?,?]", "[d0_0,d0_1,d0_2,d1_3]");
   INFER_OK(op, "[1,?,4,1];[?,?,?,?]", "[d0_0,d0_1,d0_2,d1_3]");
   INFER_OK(op, "[1,4,?,1];[?,?,?,?]", "[d0_0,d0_1,d0_2,d1_3]");
   INFER_OK(op, "[1,4,4,?];[?,?,?,?]", "[d0_0,d0_1,d0_2,d1_3]");
-  INFER_OK(op, "[1,4,4,1];[?,?,?,?]", "[d0_0,d0_1,d0_2,d1_3]");
-  INFER_OK(op, "[1,4,4,1];[?,?,?,?]", "[d0_0,d0_1,d0_2,d1_3]");
+  INFER_OK(op, "[?,4,4,1];[?,?,?,?]", "[d0_0,d0_1,d0_2,d1_3]");
 
   // With stride != 1, the input HW dims are divided to produce output dims.
-  set_op({{1, 2, 2, 1}}, "SAME", "NHWC");
+  set_op({{1, 2, 2, 1}}, "SAME", "NHWC", "HWIO");
   INFER_OK(op, "[?,4,4,1];[?,?,?,?]", "[d0_0,2,2,d1_3]");
   INFER_OK(op, "[1,?,4,1];[?,?,?,?]", "[d0_0,?,2,d1_3]");
   INFER_OK(op, "[1,4,?,1];[?,?,?,?]", "[d0_0,2,?,d1_3]");
@@ -704,7 +705,7 @@ TEST(CommonShapeFnsTest, AvgPool2DShapeTest) {
   INFER_ERROR("Dimension must be 4 but is 3", op, "[2,5,7,11,3]");
 
   // Invalid rank for input
-  INFER_ERROR("must be at least rank 4", op, "[4,4]");
+  INFER_ERROR("Shape must be rank", op, "[4,4]");
 }
 
 TEST(CommonShapeFnsTest, MaxPool2DShapeTest) {
@@ -741,6 +742,48 @@ TEST(CommonShapeFnsTest, MaxPool2DShapeTest) {
   INFER_ERROR("Dimension must be 4 but is 8", op, "[2,3,5,7,8]");
 }
 
+TEST(CommonShapeFnsTest, MaxPoolV22DShapeTest) {
+  ShapeInferenceTestOp op("MaxPoolV2");
+  Tensor ksizes_tensor, strides_tensor;
+  auto set_op = [&op, &ksizes_tensor, &strides_tensor](
+                    const std::vector& strides,
+                    const std::vector& ksizes, const string& padding,
+                    const string& data_format) {
+    TF_CHECK_OK(NodeDefBuilder("test", "MaxPoolV2")
+                    .Input("input", 0, DT_FLOAT)
+                    .Input("ksize", 1, DT_INT32)
+                    .Input("strides", 2, DT_INT32)
+                    .Attr("padding", padding)
+                    .Attr("data_format", data_format)
+                    .Finalize(&op.node_def));
+    ksizes_tensor = test::AsTensor(ksizes);
+    op.input_tensors.resize(3);
+    op.input_tensors[0] = nullptr;
+    op.input_tensors[1] = &ksizes_tensor;
+    strides_tensor = test::AsTensor(strides);
+    op.input_tensors[2] = &strides_tensor;
+  };
+
+  // Most of the functionality is tested by conv-like shapes,
+  // so we check the very-specific maxpooling features here,
+  // namely depthwise kernel and striding.
+
+  // all 1 strides, depth 2 filter
+  set_op({1, 1, 1, 1}, {1, 1, 1, 2}, "VALID", "NHWC");
+  INFER_OK(op, "[1,2,2,2];[4];[4]", "[d0_0,2,2,1]");
+
+  // depth 3 stride, 1x1x1 filter, NCHW
+  set_op({1, 3, 1, 1}, {1, 1, 1, 1}, "VALID", "NCHW");
+  INFER_OK(op, "[1,7,5,5];[4];[4]", "[d0_0,3,5,5]");
+
+  // 5x7 input, 2x2 ksize, 1x1 stride, NCHW_VECT_C tests
+  set_op({{1, 1, 1, 1}}, {1, 1, 2, 2}, "SAME", "NCHW_VECT_C");
+  INFER_OK(op, "[2,3,5,7,4];[4];[4]", "[d0_0,d0_1,d0_2,d0_3,4]");
+  INFER_OK(op, "[5,7,?,?,4];[4];[4]", "[d0_0,d0_1,d0_2,d0_3,4]");
+  INFER_OK(op, "[?,?,?,?,4];[4];[4]", "[d0_0,d0_1,d0_2,d0_3,4]");
+  INFER_ERROR("Dimension must be 4 but is 8", op, "[2,3,5,7,8];[4];[4]");
+}
+
 TEST(CommonShapeFnsTest, Pool3DShapeTest) {
   ShapeInferenceTestOp op("MaxPool3D");
   auto set_op = [&op](const std::vector& strides,
diff --git a/tensorflow/core/framework/summary.proto b/tensorflow/core/framework/summary.proto
index ba49033331..55879f8783 100644
--- a/tensorflow/core/framework/summary.proto
+++ b/tensorflow/core/framework/summary.proto
@@ -42,7 +42,7 @@ message SummaryMetadata {
 
     // The content to store for the plugin. The best practice is for this to be
     // a binary serialized protocol buffer.
-    string content = 2;
+    bytes content = 2;
   }
 
   // Data that associates a summary with a certain plugin.
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 4c79323197..cf5d6e8baa 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1205,12 +1205,12 @@ int MklLayoutRewritePass::SetUpContiguousInputs(
       if (do_connect_conv2d_backprop_input_filter &&
           iidx == kConv2DBackpropInputFilterInputSlotIdx) {
         GetNodeProducingMklTensor(g, old_node, conv2d_node,
-                                  kConv2DFilterOutputSlotIdx,
-                                  &mkl_node, &mkl_node_output_slot);
+                                  kConv2DFilterOutputSlotIdx, &mkl_node,
+                                  &mkl_node_output_slot);
       } else {
         GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first,
-                                  old_node_inputs[iidx].second,
-                                  &mkl_node, &mkl_node_output_slot);
+                                  old_node_inputs[iidx].second, &mkl_node,
+                                  &mkl_node_output_slot);
       }
       nb->Input(mkl_node, mkl_node_output_slot);
       iidx++;
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index efc5d7c553..32a1b2c84d 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -20,6 +20,7 @@ package_group(
     packages = [
         "//learning/brain/contrib/...",
         "//learning/brain/research/sparse_matrix/...",
+        "//learning/faster_training/...",
         "//tensorflow/...",
     ],
 )
@@ -3350,10 +3351,9 @@ tf_cc_test(
     srcs = ["parse_tensor_test.cc"],
     deps = [
         ":ops_testutil",
-        ":ops_util",
         ":parse_tensor_op",
+        "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
-        "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
     ],
@@ -5587,6 +5587,20 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "dataset_utils",
+    srcs = ["dataset_utils.cc"],
+    hdrs = ["dataset_utils.h"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+        "//tensorflow/core/util/tensor_bundle",
+    ],
+)
+
 cc_library(
     name = "captured_function",
     srcs = ["captured_function.cc"],
@@ -5713,6 +5727,7 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -5727,6 +5742,22 @@ tf_kernel_library(
     deps = [
         ":captured_function",
         ":dataset",
+        ":dataset_utils",
+        "//tensorflow/core:core_cpu_internal",
+        "//tensorflow/core:dataset_ops_op_lib",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
+    ],
+)
+
+tf_kernel_library(
+    name = "sloppy_interleave_dataset_op",
+    srcs = ["sloppy_interleave_dataset_op.cc"],
+    deps = [
+        ":captured_function",
+        ":dataset",
+        ":dataset_utils",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:dataset_ops_op_lib",
         "//tensorflow/core:framework",
@@ -5963,6 +5994,7 @@ tf_kernel_library(
         ":repeat_dataset_op",
         ":shuffle_dataset_op",
         ":skip_dataset_op",
+        ":sloppy_interleave_dataset_op",
         ":sparse_tensor_slice_dataset_op",
         ":sql_dataset_ops",
         ":take_dataset_op",
diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h
index 168cf37bc7..c852dc9991 100644
--- a/tensorflow/core/kernels/conv_ops_gpu.h
+++ b/tensorflow/core/kernels/conv_ops_gpu.h
@@ -92,11 +92,11 @@ class ConvParameters {
   ConvParameters(int64 batch, int64 in_depths, const SpatialArray& in,
                  int64 out_depths, const SpatialArray& filter,
                  const SpatialArray& stride, const SpatialArray& padding,
-                 const DataType& dtype, int device_id)
+                 DataType dtype, int device_id)
       : batch_(batch),
         in_depths_(in_depths),
-        in_(in),
         out_depths_(out_depths),
+        in_(in),
         filter_(filter),
         stride_(stride),
         padding_(padding),
@@ -130,7 +130,8 @@ class ConvParameters {
         "(", str_util::Join(filter_, ", "), "), ",
         "(", str_util::Join(stride_, ", "), "), ",
         "(", str_util::Join(padding_, ", "), "), ",
-        dtype_, ", ", device_id_);
+        dtype_, ", ",
+        device_id_);
     // clang-format on
   }
 
@@ -150,26 +151,28 @@ class ConvParameters {
     }
   }
 
- private:
-  typedef std::tuple
-      ParameterDataType;
+ protected:
+  using ParameterDataType =
+      std::tuple;
 
   ParameterDataType get_data_as_tuple() const {
     return std::make_tuple(batch_, in_depths_, in_, out_depths_, filter_,
                            stride_, padding_, dtype_, device_id_);
   }
 
+  uint64 hash_code_;
+
+ private:
   int64 batch_;
   int64 in_depths_;
-  SpatialArray in_;
   int64 out_depths_;
+  SpatialArray in_;
   SpatialArray filter_;
   SpatialArray stride_;
   SpatialArray padding_;
   DataType dtype_;
   int device_id_;
-  uint64 hash_code_;
 };
 
 typedef Eigen::GpuDevice GPUDevice;
diff --git a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
index 2307c2de0e..3d4670c9ba 100644
--- a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
+++ b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc
@@ -556,6 +556,7 @@ template struct functor::NCHWToNHWC;
 template struct functor::NCHWToNHWC;
 template struct functor::NCHWToNHWC;
 
+template struct functor::PadInput;
 template struct functor::PadInput;
 template struct functor::PadInput;
 
diff --git a/tensorflow/core/kernels/crop_and_resize_op.cc b/tensorflow/core/kernels/crop_and_resize_op.cc
index 56181a686c..45cc2fbbb8 100644
--- a/tensorflow/core/kernels/crop_and_resize_op.cc
+++ b/tensorflow/core/kernels/crop_and_resize_op.cc
@@ -19,59 +19,98 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/crop_and_resize_op.h"
 
+#include 
+#include 
+
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/work_sharder.h"
 
 #if GOOGLE_CUDA
+#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
+#include "tensorflow/core/platform/cuda.h"
 #include "tensorflow/core/platform/stream_executor.h"
+
+using ::perftools::gputools::cuda::ScopedActivateExecutorContext;
 #endif  // GOOGLE_CUDA
 
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
+using Callback = std::function;
+
+namespace {
 
-static inline void ParseAndCheckBoxSizes(OpKernelContext* context,
-                                         const Tensor& boxes,
-                                         const Tensor& box_ind,
-                                         int* num_boxes) {
-  if (boxes.NumElements() == 0 && box_ind.NumElements() == 0) {
+static inline Status ParseAndCheckBoxSizes(const Tensor& boxes,
+                                           const Tensor& box_index,
+                                           int* num_boxes) {
+  if (boxes.NumElements() == 0 && box_index.NumElements() == 0) {
     *num_boxes = 0;
-    return;
+    return Status::OK();
   }
   // The shape of 'boxes' is [num_boxes, 4].
-  OP_REQUIRES(context, boxes.dims() == 2,
-              errors::InvalidArgument("boxes must be 2-D",
-                                      boxes.shape().DebugString()));
+  if (boxes.dims() != 2) {
+    return errors::InvalidArgument("boxes must be 2-D",
+                                   boxes.shape().DebugString());
+  }
   *num_boxes = boxes.dim_size(0);
-  OP_REQUIRES(context, boxes.dim_size(1) == 4,
-              errors::InvalidArgument("boxes must have 4 columns"));
-
-  // The shape of 'box_ind' is [num_boxes].
-  OP_REQUIRES(context, box_ind.dims() == 1,
-              errors::InvalidArgument("box_ind must be 1-D",
-                                      box_ind.shape().DebugString()));
-  OP_REQUIRES(context, box_ind.dim_size(0) == *num_boxes,
-              errors::InvalidArgument("box_ind has incompatible shape"));
+  if (boxes.dim_size(1) != 4) {
+    return errors::InvalidArgument("boxes must have 4 columns");
+  }
+  // The shape of 'box_index' is [num_boxes].
+  if (box_index.dims() != 1) {
+    return errors::InvalidArgument("box_index must be 1-D",
+                                   box_index.shape().DebugString());
+  }
+  if (box_index.dim_size(0) != *num_boxes) {
+    return errors::InvalidArgument("box_index has incompatible shape");
+  }
+  return Status::OK();
 }
 
-// Verifies that all values in box_ind are in [0, batch).
+// Conditionally calls the compute callback if all values in box_index are in
+// [0, batch_size) then calls done.
 template 
-inline void CheckValidBoxInd(
-    OpKernelContext* context,
-    typename TTypes::ConstTensor box_ind_data, int batch);
+inline void RunIfBoxIndexIsValid(
+    OpKernelContext* context, typename TTypes::ConstTensor box_index,
+    int batch_size, const Callback& compute, const Callback& done);
+
+// Specialization of CheckValidBoxIndex for a CPUDevice.
+template <>
+inline void RunIfBoxIndexIsValid(
+    OpKernelContext* context, typename TTypes::ConstTensor box_index,
+    int batch_size, const Callback& compute, const Callback& done) {
+  const int num_boxes = box_index.dimension(0);
+  for (int b = 0; b < num_boxes; ++b) {
+    OP_REQUIRES_ASYNC(
+        context, FastBoundsCheck(box_index(b), batch_size),
+        errors::OutOfRange("box_index has values outside [0, batch_size)"),
+        done);
+  }
+  if (compute) {
+    compute();
+  }
+  if (done) {
+    done();
+  }
+}
+
+}  // namespace
 
 template 
-class CropAndResizeOp : public OpKernel {
+class CropAndResizeOp : public AsyncOpKernel {
  public:
-  explicit CropAndResizeOp(OpKernelConstruction* context) : OpKernel(context) {
+  explicit CropAndResizeOp(OpKernelConstruction* context)
+      : AsyncOpKernel(context) {
     string method;
     OP_REQUIRES_OK(context, context->GetAttr("method", &method));
     OP_REQUIRES(context, method == "bilinear",
@@ -80,69 +119,77 @@ class CropAndResizeOp : public OpKernel {
                                              &extrapolation_value_));
   }
 
-  void Compute(OpKernelContext* context) override {
-    // The shape of 'image' is [batch, image_height, image_width, channels].
+  void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
+    // The shape of 'image' is [batch_size, image_height, image_width,
+    // channels].
     const Tensor& image = context->input(0);
-    OP_REQUIRES(context, image.dims() == 4,
-                errors::InvalidArgument("input image must be 4-D",
-                                        image.shape().DebugString()));
-
-    const int batch = image.dim_size(0);
-    const int image_height = image.dim_size(1);
-    const int image_width = image.dim_size(2);
-    const int depth = image.dim_size(3);
-    OP_REQUIRES(context, image_height > 0 && image_width > 0,
-                errors::InvalidArgument("image dimensions must be positive"));
-
     // The shape of 'boxes' is [num_boxes, 4].
     const Tensor& boxes = context->input(1);
-
-    // The shape of 'box_ind' is [num_boxes].
-    const Tensor& box_ind = context->input(2);
-
-    int num_boxes = 0;
-    ParseAndCheckBoxSizes(context, boxes, box_ind, &num_boxes);
-
+    // The shape of 'box_index' is [num_boxes].
+    const Tensor& box_index = context->input(2);
     // The shape of 'crop_size' is [2].
     const Tensor& crop_size = context->input(3);
 
-    OP_REQUIRES(context, crop_size.dims() == 1,
-                errors::InvalidArgument("crop_size must be 1-D",
-                                        crop_size.shape().DebugString()));
-    OP_REQUIRES(context, crop_size.dim_size(0) == 2,
-                errors::InvalidArgument("crop_size must have two elements",
-                                        crop_size.shape().DebugString()));
-
+    // Validate inputs dimensions.
+    OP_REQUIRES_ASYNC(context, image.dims() == 4,
+                      errors::InvalidArgument("input image must be 4-D",
+                                              image.shape().DebugString()),
+                      done);
+    const int batch_size = image.dim_size(0);
+    const int image_height = image.dim_size(1);
+    const int image_width = image.dim_size(2);
+    const int depth = image.dim_size(3);
+    OP_REQUIRES_ASYNC(
+        context, image_height > 0 && image_width > 0,
+        errors::InvalidArgument("image dimensions must be positive"), done);
+    int num_boxes = 0;
+    OP_REQUIRES_OK_ASYNC(
+        context, ParseAndCheckBoxSizes(boxes, box_index, &num_boxes), done);
+
+    OP_REQUIRES_ASYNC(context, crop_size.dims() == 1,
+                      errors::InvalidArgument("crop_size must be 1-D",
+                                              crop_size.shape().DebugString()),
+                      done);
+    OP_REQUIRES_ASYNC(
+        context, crop_size.dim_size(0) == 2,
+        errors::InvalidArgument("crop_size must have two elements",
+                                crop_size.shape().DebugString()),
+        done);
+
+    // Copy and validate crop sizes.
     auto crop_size_vec = crop_size.vec();
     const int crop_height = internal::SubtleMustCopy(crop_size_vec(0));
     const int crop_width = internal::SubtleMustCopy(crop_size_vec(1));
-    OP_REQUIRES(context, crop_height > 0 && crop_width > 0,
-                errors::InvalidArgument("crop dimensions must be positive"));
+    OP_REQUIRES_ASYNC(
+        context, crop_height > 0 && crop_width > 0,
+        errors::InvalidArgument("crop dimensions must be positive"), done);
 
     // Allocate output tensor.
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(
+    OP_REQUIRES_OK_ASYNC(
         context,
         context->allocate_output(
             0, TensorShape({num_boxes, crop_height, crop_width, depth}),
-            &output));
-
-    typename TTypes::ConstTensor image_data = image.tensor();
-    typename TTypes::ConstTensor boxes_data =
-        boxes.tensor();
-    typename TTypes::ConstTensor box_ind_data =
-        box_ind.tensor();
-    typename TTypes::Tensor crops_data = output->tensor();
-
-    CheckValidBoxInd(context, box_ind_data, batch);
-
-    bool status = functor::CropAndResize()(
-        context, image_data, boxes_data, box_ind_data, extrapolation_value_,
-        crops_data);
-    if (!status) {
-      context->SetStatus(
-          errors::Internal("Failed launch CropAndResizeKernel."));
-    }
+            &output),
+        done);
+
+    auto compute_callback = [this, context, output]() {
+      const Tensor& image = context->input(0);
+      const Tensor& boxes = context->input(1);
+      const Tensor& box_index = context->input(2);
+      const bool status = functor::CropAndResize()(
+          context, image.tensor(), boxes.tensor(),
+          box_index.tensor(), extrapolation_value_,
+          output->tensor());
+      if (!status) {
+        context->SetStatus(
+            errors::Internal("Failed launch CropAndResizeKernel."));
+      }
+    };
+
+    RunIfBoxIndexIsValid(context, box_index.tensor(),
+                                 batch_size, std::move(compute_callback),
+                                 std::move(done));
   }
 
  private:
@@ -156,10 +203,10 @@ struct CropAndResize {
   bool operator()(const OpKernelContext* context,
                   typename TTypes::ConstTensor image,
                   typename TTypes::ConstTensor boxes,
-                  typename TTypes::ConstTensor box_ind,
+                  typename TTypes::ConstTensor box_index,
                   float extrapolation_value,
                   typename TTypes::Tensor crops) {
-    const int batch = image.dimension(0);
+    const int batch_size = image.dimension(0);
     const int image_height = image.dimension(1);
     const int image_width = image.dimension(2);
 
@@ -176,8 +223,8 @@ struct CropAndResize {
         const float y2 = boxes(b, 2);
         const float x2 = boxes(b, 3);
 
-        const int32 b_in = box_ind(b);
-        if (b_in < 0 || b_in >= batch) {
+        const int32 b_in = box_index(b);
+        if (!FastBoundsCheck(b_in, batch_size)) {
           continue;
         }
 
@@ -255,89 +302,94 @@ struct CropAndResize {
     return true;
   }
 };
+
 }  // namespace functor
 
 template 
-class CropAndResizeGradImageOp : public OpKernel {
+class CropAndResizeGradImageOp : public AsyncOpKernel {
  public:
   explicit CropAndResizeGradImageOp(OpKernelConstruction* context)
-      : OpKernel(context) {
+      : AsyncOpKernel(context) {
     string method;
     OP_REQUIRES_OK(context, context->GetAttr("method", &method));
     OP_REQUIRES(context, method == "bilinear",
                 errors::InvalidArgument("method must be 'bilinear'", method));
   }
 
-  void Compute(OpKernelContext* context) override {
+  void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
     // The shape of 'grads' is [num_boxes, crop_height, crop_width, depth].
     const Tensor& grads = context->input(0);
-
-    OP_REQUIRES(context, grads.dims() == 4,
-                errors::InvalidArgument("grads image must be 4-D",
-                                        grads.shape().DebugString()));
-    const int crop_height = grads.dim_size(1);
-    const int crop_width = grads.dim_size(2);
-    OP_REQUIRES(context, crop_height > 0 && crop_width > 0,
-                errors::InvalidArgument("grads dimensions must be positive"));
-
     // The shape of 'boxes' is [num_boxes, 4].
     const Tensor& boxes = context->input(1);
-
-    // The shape of 'box_ind' is [num_boxes].
-    const Tensor& box_ind = context->input(2);
-
-    int num_boxes = 0;
-    ParseAndCheckBoxSizes(context, boxes, box_ind, &num_boxes);
-
-    OP_REQUIRES(
-        context, grads.dim_size(0) == num_boxes,
-        errors::InvalidArgument("boxes and grads have incompatible shape"));
-
+    // The shape of 'box_index' is [num_boxes].
+    const Tensor& box_index = context->input(2);
     // The shape of 'image_size' is [4].
     const Tensor& image_size = context->input(3);
-    OP_REQUIRES(context, image_size.dims() == 1,
-                errors::InvalidArgument("image_size must be 1-D",
-                                        image_size.shape().DebugString()));
-    OP_REQUIRES(context, image_size.dim_size(0) == 4,
-                errors::InvalidArgument("image_size must have 4 elements",
-                                        image_size.shape().DebugString()));
 
+    // Validate input shapes.
+    OP_REQUIRES_ASYNC(context, grads.dims() == 4,
+                      errors::InvalidArgument("grads image must be 4-D",
+                                              grads.shape().DebugString()),
+                      done);
+    const int crop_height = grads.dim_size(1);
+    const int crop_width = grads.dim_size(2);
+    OP_REQUIRES_ASYNC(
+        context, crop_height > 0 && crop_width > 0,
+        errors::InvalidArgument("grads dimensions must be positive"), done);
+    int num_boxes = 0;
+    OP_REQUIRES_OK_ASYNC(
+        context, ParseAndCheckBoxSizes(boxes, box_index, &num_boxes), done);
+    OP_REQUIRES_ASYNC(
+        context, grads.dim_size(0) == num_boxes,
+        errors::InvalidArgument("boxes and grads have incompatible shape"),
+        done);
+
+    OP_REQUIRES_ASYNC(context, image_size.dims() == 1,
+                      errors::InvalidArgument("image_size must be 1-D",
+                                              image_size.shape().DebugString()),
+                      done);
+    OP_REQUIRES_ASYNC(context, image_size.dim_size(0) == 4,
+                      errors::InvalidArgument("image_size must have 4 elements",
+                                              image_size.shape().DebugString()),
+                      done);
     auto image_size_vec = image_size.vec();
-    const int batch = internal::SubtleMustCopy(image_size_vec(0));
+    const int batch_size = internal::SubtleMustCopy(image_size_vec(0));
     const int image_height = internal::SubtleMustCopy(image_size_vec(1));
     const int image_width = internal::SubtleMustCopy(image_size_vec(2));
     const int depth = internal::SubtleMustCopy(image_size_vec(3));
-
-    OP_REQUIRES(context, image_height > 0 && image_width > 0,
-                errors::InvalidArgument("image dimensions must be positive"));
-    OP_REQUIRES(
+    OP_REQUIRES_ASYNC(
+        context, image_height > 0 && image_width > 0,
+        errors::InvalidArgument("image dimensions must be positive"), done);
+    OP_REQUIRES_ASYNC(
         context, grads.dim_size(3) == depth,
-        errors::InvalidArgument("image_size and grads are incompatible"));
+        errors::InvalidArgument("image_size and grads are incompatible"), done);
 
     // Allocate output tensor.
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(
-        context, context->allocate_output(
-                     0, TensorShape({batch, image_height, image_width, depth}),
-                     &output));
-
-    typename TTypes::ConstTensor grads_data =
-        grads.tensor();
-    typename TTypes::ConstTensor boxes_data =
-        boxes.tensor();
-    typename TTypes::ConstTensor box_ind_data =
-        box_ind.tensor();
-    typename TTypes::Tensor output_data = output->tensor();
-
-    CheckValidBoxInd(context, box_ind_data, batch);
-
-    bool status = functor::CropAndResizeBackpropImage()(
-        context->eigen_device(), grads_data, boxes_data, box_ind_data,
-        output_data);
-    if (!status) {
-      context->SetStatus(
-          errors::Internal("Failed launch CropAndResizeBackpropImageKernel."));
-    }
+    OP_REQUIRES_OK_ASYNC(
+        context,
+        context->allocate_output(
+            0, TensorShape({batch_size, image_height, image_width, depth}),
+            &output),
+        done);
+
+    auto compute_callback = [context, output]() {
+      const Tensor& grads = context->input(0);
+      const Tensor& boxes = context->input(1);
+      const Tensor& box_index = context->input(2);
+      const bool status = functor::CropAndResizeBackpropImage()(
+          context->eigen_device(), grads.tensor(),
+          boxes.tensor(), box_index.tensor(),
+          output->tensor());
+      if (!status) {
+        context->SetStatus(errors::Internal(
+            "Failed launch CropAndResizeBackpropImage kernel."));
+      }
+    };
+
+    RunIfBoxIndexIsValid(context, box_index.tensor(),
+                                 batch_size, std::move(compute_callback),
+                                 std::move(done));
   }
 };
 
@@ -348,9 +400,9 @@ struct CropAndResizeBackpropImage {
   bool operator()(const CPUDevice& d,
                   typename TTypes::ConstTensor grads,
                   typename TTypes::ConstTensor boxes,
-                  typename TTypes::ConstTensor box_ind,
+                  typename TTypes::ConstTensor box_index,
                   typename TTypes::Tensor grads_image) {
-    const int batch = grads_image.dimension(0);
+    const int batch_size = grads_image.dimension(0);
     const int image_height = grads_image.dimension(1);
     const int image_width = grads_image.dimension(2);
 
@@ -367,8 +419,8 @@ struct CropAndResizeBackpropImage {
       const float y2 = boxes(b, 2);
       const float x2 = boxes(b, 3);
 
-      const int32 b_in = box_ind(b);
-      if (b_in < 0 || b_in >= batch) {
+      const int32 b_in = box_index(b);
+      if (!FastBoundsCheck(b_in, batch_size)) {
         continue;
       }
 
@@ -419,83 +471,90 @@ struct CropAndResizeBackpropImage {
     return true;
   }
 };
+
 }  // namespace functor
 
 template 
-class CropAndResizeGradBoxesOp : public OpKernel {
+class CropAndResizeGradBoxesOp : public AsyncOpKernel {
  public:
   explicit CropAndResizeGradBoxesOp(OpKernelConstruction* context)
-      : OpKernel(context) {
+      : AsyncOpKernel(context) {
     string method;
     OP_REQUIRES_OK(context, context->GetAttr("method", &method));
     OP_REQUIRES(context, method == "bilinear",
                 errors::InvalidArgument("method must be 'bilinear'", method));
   }
 
-  void Compute(OpKernelContext* context) override {
+  void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
     // The shape of 'grads' is [num_boxes, crop_height, crop_width, depth].
     const Tensor& grads = context->input(0);
+    // The shape of 'boxes' is [num_boxes, 4].
+    const Tensor& boxes = context->input(2);
+    // The shape of 'box_index' is [num_boxes].
+    const Tensor& box_index = context->input(3);
+    // The shape of 'image' is [batch_size, image_height, image_width, depth].
+    const Tensor& image = context->input(1);
 
-    OP_REQUIRES(context, grads.dims() == 4,
-                errors::InvalidArgument("grads image must be 4-D",
-                                        grads.shape().DebugString()));
-
+    // Validate input shapes.
+    OP_REQUIRES_ASYNC(context, grads.dims() == 4,
+                      errors::InvalidArgument("grads image must be 4-D",
+                                              grads.shape().DebugString()),
+                      done);
     const int crop_height = grads.dim_size(1);
     const int crop_width = grads.dim_size(2);
     const int depth = grads.dim_size(3);
-    OP_REQUIRES(context, crop_height > 0 && crop_width > 0,
-                errors::InvalidArgument("grads dimensions must be positive"));
-
-    // The shape of 'image' is [batch, image_height, image_width, depth].
-    const Tensor& image = context->input(1);
-    OP_REQUIRES(context, image.dims() == 4,
-                errors::InvalidArgument("input image must be 4-D",
-                                        image.shape().DebugString()));
-
-    const int batch = image.dim_size(0);
+    OP_REQUIRES_ASYNC(
+        context, crop_height > 0 && crop_width > 0,
+        errors::InvalidArgument("grads dimensions must be positive"), done);
+
+    OP_REQUIRES_ASYNC(context, image.dims() == 4,
+                      errors::InvalidArgument("input image must be 4-D",
+                                              image.shape().DebugString()),
+                      done);
+    const int batch_size = image.dim_size(0);
     const int image_height = image.dim_size(1);
     const int image_width = image.dim_size(2);
-    OP_REQUIRES(context, image_height > 0 && image_width > 0,
-                errors::InvalidArgument("image dimensions must be positive"));
-    OP_REQUIRES(context, image.dim_size(3) == depth,
-                errors::InvalidArgument("image, grads depth differ"));
-
-    // The shape of 'boxes' is [num_boxes, 4].
-    const Tensor& boxes = context->input(2);
-
-    // The shape of 'box_ind' is [num_boxes].
-    const Tensor& box_ind = context->input(3);
+    OP_REQUIRES_ASYNC(
+        context, image_height > 0 && image_width > 0,
+        errors::InvalidArgument("image dimensions must be positive"), done);
+    OP_REQUIRES_ASYNC(context, image.dim_size(3) == depth,
+                      errors::InvalidArgument("image, grads depth differ"),
+                      done);
 
     int num_boxes = 0;
-    ParseAndCheckBoxSizes(context, boxes, box_ind, &num_boxes);
+    OP_REQUIRES_OK_ASYNC(
+        context, ParseAndCheckBoxSizes(boxes, box_index, &num_boxes), done);
 
-    OP_REQUIRES(
+    OP_REQUIRES_ASYNC(
         context, grads.dim_size(0) == num_boxes,
-        errors::InvalidArgument("boxes and grads have incompatible shape"));
+        errors::InvalidArgument("boxes and grads have incompatible shape"),
+        done);
 
     // Allocate output tensor.
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(
-                                0, TensorShape({num_boxes, 4}), &output));
-
-    typename TTypes::ConstTensor grads_data =
-        grads.tensor();
-    typename TTypes::ConstTensor image_data = image.tensor();
-    typename TTypes::ConstTensor boxes_data =
-        boxes.tensor();
-    typename TTypes::ConstTensor box_ind_data =
-        box_ind.tensor();
-    typename TTypes::Tensor output_data = output->tensor();
-
-    CheckValidBoxInd(context, box_ind_data, batch);
-
-    bool status = functor::CropAndResizeBackpropBoxes()(
-        context->eigen_device(), grads_data, image_data, boxes_data,
-        box_ind_data, output_data);
-    if (!status) {
-      context->SetStatus(
-          errors::Internal("Failed launch CropAndResizeBackpropBoxesKernel."));
-    }
+    OP_REQUIRES_OK_ASYNC(
+        context,
+        context->allocate_output(0, TensorShape({num_boxes, 4}), &output),
+        done);
+
+    auto compute_callback = [context, output]() {
+      const Tensor& grads = context->input(0);
+      const Tensor& image = context->input(1);
+      const Tensor& boxes = context->input(2);
+      const Tensor& box_index = context->input(3);
+      const bool status = functor::CropAndResizeBackpropBoxes()(
+          context->eigen_device(), grads.tensor(),
+          image.tensor(), boxes.tensor(),
+          box_index.tensor(), output->tensor());
+      if (!status) {
+        context->SetStatus(errors::Internal(
+            "Failed launch CropAndResizeBackpropBoxes kernel."));
+      }
+    };
+
+    RunIfBoxIndexIsValid(context, box_index.tensor(),
+                                 batch_size, std::move(compute_callback),
+                                 std::move(done));
   }
 };
 
@@ -507,9 +566,9 @@ struct CropAndResizeBackpropBoxes {
                   typename TTypes::ConstTensor grads,
                   typename TTypes::ConstTensor image,
                   typename TTypes::ConstTensor boxes,
-                  typename TTypes::ConstTensor box_ind,
+                  typename TTypes::ConstTensor box_index,
                   typename TTypes::Tensor grads_boxes) {
-    const int batch = image.dimension(0);
+    const int batch_size = image.dimension(0);
     const int image_height = image.dimension(1);
     const int image_width = image.dimension(2);
 
@@ -526,8 +585,8 @@ struct CropAndResizeBackpropBoxes {
       const float y2 = boxes(b, 2);
       const float x2 = boxes(b, 3);
 
-      const int32 b_in = box_ind(b);
-      if (b_in < 0 || b_in >= batch) {
+      const int32 b_in = box_index(b);
+      if (!FastBoundsCheck(b_in, batch_size)) {
         continue;
       }
 
@@ -609,30 +668,19 @@ struct CropAndResizeBackpropBoxes {
     return true;
   }
 };
-}  // namespace functor
 
-// Specialization of CheckValidBoxInd for a CPUDevice.
-template <>
-inline void CheckValidBoxInd(
-    OpKernelContext* context, typename TTypes::ConstTensor box_ind,
-    int batch) {
-  const int num_boxes = box_ind.dimension(0);
-  for (int b = 0; b < num_boxes; ++b) {
-    OP_REQUIRES(context, box_ind(b) >= 0 && box_ind(b) < batch,
-                errors::OutOfRange("box_ind has values outside [0, batch)"));
-  }
-}
+}  // namespace functor
 
-#define REGISTER_KERNEL(T)                                         \
-  REGISTER_KERNEL_BUILDER(Name("CropAndResize")                    \
-                              .Device(DEVICE_CPU)                  \
-                              .TypeConstraint("T")              \
-                              .HostMemory("crop_size"),            \
-                          CropAndResizeOp);          \
-                                                                   \
-  REGISTER_KERNEL_BUILDER(Name("CropAndResizeGradBoxes")           \
-                              .Device(DEVICE_CPU)                  \
-                              .TypeConstraint("T"),             \
+#define REGISTER_KERNEL(T)                                \
+  REGISTER_KERNEL_BUILDER(Name("CropAndResize")           \
+                              .Device(DEVICE_CPU)         \
+                              .TypeConstraint("T")     \
+                              .HostMemory("crop_size"),   \
+                          CropAndResizeOp); \
+                                                          \
+  REGISTER_KERNEL_BUILDER(Name("CropAndResizeGradBoxes")  \
+                              .Device(DEVICE_CPU)         \
+                              .TypeConstraint("T"),    \
                           CropAndResizeGradBoxesOp);
 
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
@@ -654,50 +702,93 @@ TF_CALL_double(REGISTER_KERNEL);
 
 #if GOOGLE_CUDA
 
-// Forward declaration of the CheckValidBoxIndHelper specialization for GPU.
+// Forward declaration of the CheckValidBoxIndexHelper specialization for GPU.
 namespace functor {
 template <>
-void CheckValidBoxIndHelper::operator()(
-    const GPUDevice& d, typename TTypes::ConstTensor box_ind,
-    int batch, typename TTypes::Tensor isvalid);
-extern template struct CheckValidBoxIndHelper;
+void CheckValidBoxIndexHelper::operator()(
+    const GPUDevice& d, typename TTypes::ConstTensor box_index,
+    int batch_size, typename TTypes::Tensor isvalid);
+extern template struct CheckValidBoxIndexHelper;
 }  // namespace functor
 
-// Specialization of CheckValidBoxInd for a GPUDevice.
+namespace {
+
+// Specialization of CheckValidBoxIndex for a GPUDevice.
 template <>
-inline void CheckValidBoxInd(
-    OpKernelContext* context, typename TTypes::ConstTensor box_ind,
-    int batch) {
-  const int num_boxes = box_ind.dimension(0);
+inline void RunIfBoxIndexIsValid(
+    OpKernelContext* context, typename TTypes::ConstTensor box_index,
+    int batch_size, const Callback& compute, const Callback& done) {
+  const int num_boxes = box_index.dimension(0);
   if (num_boxes == 0) {
+    compute();
+    done();
     return;
   }
-  Tensor isvalid_tensor;
-  OP_REQUIRES_OK(context,
-                 context->allocate_temp(DataTypeToEnum::value,
-                                        TensorShape({}), &isvalid_tensor));
 
-  typename TTypes::Tensor isvalid = isvalid_tensor.tensor();
+  Tensor isvalid_dev_tensor;
+  OP_REQUIRES_OK_ASYNC(
+      context,
+      context->allocate_temp(DataTypeToEnum::value, TensorShape({}),
+                             &isvalid_dev_tensor),
+      done);
+  typename TTypes::Tensor isvalid_dev =
+      isvalid_dev_tensor.tensor();
 
-  functor::CheckValidBoxIndHelper()(
-      context->eigen_device(), box_ind, batch, isvalid);
+  // Run the actual box check on the device.
+  functor::CheckValidBoxIndexHelper()(
+      context->eigen_device(), box_index, batch_size, isvalid_dev);
 
+  // Copy the result back to the host.
   auto* stream = context->op_device_context()->stream();
-  OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
-
-  bool isvalid_host = false;
-  perftools::gputools::DeviceMemoryBase isvalid_gpu(isvalid.data(),
-                                                    sizeof(bool));
-  stream->ThenMemcpy(&isvalid_host, isvalid_gpu, sizeof(bool));
-  stream->BlockHostUntilDone();
-
-  OP_REQUIRES(context, stream->ok(),
-              errors::Internal("cudaMemcpy from device to host failed"));
-
-  OP_REQUIRES(context, isvalid_host,
-              errors::OutOfRange("box_ind has values outside [0, batch)"));
+  OP_REQUIRES_ASYNC(context, stream,
+                    errors::Internal("No GPU stream available."), done);
+  Tensor isvalid_host_tensor;
+  // Use pinned host memory on the host to avoid unnecessary
+  // synchronization.
+  AllocatorAttributes alloc_attr;
+  alloc_attr.set_on_host(true);
+  alloc_attr.set_gpu_compatible(true);
+  OP_REQUIRES_OK_ASYNC(
+      context,
+      context->allocate_temp(DataTypeToEnum::value, TensorShape({}),
+                             &isvalid_host_tensor, alloc_attr),
+      done);
+  perftools::gputools::DeviceMemoryBase wrapped(isvalid_dev.data(),
+                                                sizeof(bool));
+  const bool status =
+      stream
+          ->ThenMemcpy(
+              isvalid_host_tensor.scalar().data() /* destination */,
+              wrapped /* source */, sizeof(bool))
+          .ok();
+  OP_REQUIRES_ASYNC(
+      context, status,
+      errors::Internal("Failed to launch copy of isvalid from device to host."),
+      done);
+
+  // We capture both temporary tensors to prevent them from being deallocated
+  // when ComputeAsync returns and before the closure runs.
+  TensorReference isvalid_dev_ref(isvalid_dev_tensor);
+  auto wrapped_callback = [context, isvalid_host_tensor, isvalid_dev_ref,
+                           compute, done]() {
+    auto stream = context->op_device_context()->stream();
+    ScopedActivateExecutorContext scoped_activation{stream->parent()};
+    const bool isvalid = isvalid_host_tensor.scalar()();
+    isvalid_dev_ref.Unref();
+    OP_REQUIRES_ASYNC(
+        context, isvalid,
+        errors::OutOfRange("box_index has values outside [0, batch_size)"),
+        done);
+    compute();
+    done();
+  };
+
+  context->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
+      stream, wrapped_callback);
 }
 
+}  // namespace
+
 #define REGISTER_KERNEL(T)                                         \
   REGISTER_KERNEL_BUILDER(Name("CropAndResize")                    \
                               .Device(DEVICE_GPU)                  \
diff --git a/tensorflow/core/kernels/crop_and_resize_op.h b/tensorflow/core/kernels/crop_and_resize_op.h
index 84d7a5e03b..b6b1dbd7b0 100644
--- a/tensorflow/core/kernels/crop_and_resize_op.h
+++ b/tensorflow/core/kernels/crop_and_resize_op.h
@@ -55,12 +55,12 @@ struct CropAndResizeBackpropBoxes {
 };
 
 template 
-struct CheckValidBoxIndHelper {
-  // Checks if all values in box_ind are in [0, batch).
+struct CheckValidBoxIndexHelper {
+  // Checks if all values in box_index are in [0, batch).
   void operator()(const Device& d,
-                  typename TTypes::ConstTensor box_ind, int batch,
+                  typename TTypes::ConstTensor box_index, int batch,
                   typename TTypes::Tensor isvalid) {
-    isvalid.device(d) = ((box_ind >= 0) && (box_ind < batch)).all();
+    isvalid.device(d) = ((box_index >= 0) && (box_index < batch)).all();
   }
 };
 
diff --git a/tensorflow/core/kernels/crop_and_resize_op_gpu.cu.cc b/tensorflow/core/kernels/crop_and_resize_op_gpu.cu.cc
index 1726e4a816..d12787d524 100644
--- a/tensorflow/core/kernels/crop_and_resize_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/crop_and_resize_op_gpu.cu.cc
@@ -442,7 +442,7 @@ TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS);
 
 #undef DEFINE_GPU_SPECS
 
-template struct CheckValidBoxIndHelper;
+template struct CheckValidBoxIndexHelper;
 
 }  // namespace functor
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/crop_and_resize_op_test.cc b/tensorflow/core/kernels/crop_and_resize_op_test.cc
index 1bf28d4d00..22c659b587 100644
--- a/tensorflow/core/kernels/crop_and_resize_op_test.cc
+++ b/tensorflow/core/kernels/crop_and_resize_op_test.cc
@@ -251,7 +251,7 @@ TEST_F(CropAndResizeOpTest, TestInvalidBoxIndexShape) {
   Status s = RunOpKernel();
   ASSERT_FALSE(s.ok());
   EXPECT_TRUE(
-      StringPiece(s.ToString()).contains("box_ind has incompatible shape"))
+      StringPiece(s.ToString()).contains("box_index has incompatible shape"))
       << s;
 }
 
@@ -264,7 +264,7 @@ TEST_F(CropAndResizeOpTest, TestInvalidBoxIndex) {
   Status s = RunOpKernel();
   ASSERT_FALSE(s.ok());
   EXPECT_TRUE(StringPiece(s.ToString())
-                  .contains("box_ind has values outside [0, batch)"))
+                  .contains("box_index has values outside [0, batch_size)"))
       << s;
 }
 
diff --git a/tensorflow/core/kernels/cuda_solvers.h b/tensorflow/core/kernels/cuda_solvers.h
index ac6119d8a2..0fd6450f98 100644
--- a/tensorflow/core/kernels/cuda_solvers.h
+++ b/tensorflow/core/kernels/cuda_solvers.h
@@ -313,6 +313,9 @@ class ScratchSpace {
   int64 size() const { return scratch_tensor_.NumElements(); }
   const string& debug_info() const { return debug_info_; }
 
+  Tensor& tensor() { return scratch_tensor_; }
+  const Tensor& tensor() const { return scratch_tensor_; }
+
   // Returns true if this ScratchSpace is in host memory.
   bool on_host() const { return on_host_; }
 
diff --git a/tensorflow/core/kernels/cuda_solvers_gpu.cu.cc b/tensorflow/core/kernels/cuda_solvers_gpu.cu.cc
index b9e42b4d00..af6c094d7a 100644
--- a/tensorflow/core/kernels/cuda_solvers_gpu.cu.cc
+++ b/tensorflow/core/kernels/cuda_solvers_gpu.cu.cc
@@ -51,55 +51,57 @@ namespace {
 
 // Hacks around missing support for complex arithmetic in nvcc.
 template 
-__host__ __device__ inline Scalar Multiply(Scalar x, Scalar y) {
+__device__ inline Scalar Multiply(Scalar x, Scalar y) {
   return x * y;
 }
 
 template <>
-__host__ __device__ inline cuComplex Multiply(cuComplex x, cuComplex y) {
+__device__ inline cuComplex Multiply(cuComplex x, cuComplex y) {
   return cuCmulf(x, y);
 }
 
 template <>
-__host__ __device__ inline cuDoubleComplex Multiply(cuDoubleComplex x,
-                                                    cuDoubleComplex y) {
+__device__ inline cuDoubleComplex Multiply(cuDoubleComplex x,
+                                           cuDoubleComplex y) {
   return cuCmul(x, y);
 }
 
 template 
-__host__ __device__ inline Scalar Negate(Scalar x) {
+__device__ inline Scalar Negate(Scalar x) {
   return -x;
 }
 
 template <>
-__host__ __device__ inline cuComplex Negate(cuComplex x) {
+__device__ inline cuComplex Negate(cuComplex x) {
   return make_cuComplex(-cuCrealf(x), -cuCimagf(x));
 }
 
 template <>
-__host__ __device__ inline cuDoubleComplex Negate(cuDoubleComplex x) {
+__device__ inline cuDoubleComplex Negate(cuDoubleComplex x) {
   return make_cuDoubleComplex(-cuCreal(x), -cuCimag(x));
 }
 
 template 
-__host__ __device__ inline bool IsFinite(Scalar x) {
-  return isfinite(x);
+__device__ inline bool IsFinite(Scalar x) {
+  return Eigen::numext::isfinite(x);
 }
 
 template <>
-__host__ __device__ inline bool IsFinite(cuComplex x) {
-  return isfinite(cuCrealf(x)) && isfinite(cuCimagf(x));
+__device__ inline bool IsFinite(cuComplex x) {
+  return Eigen::numext::isfinite(cuCrealf(x)) &&
+         Eigen::numext::isfinite(cuCimagf(x));
 }
 
 template <>
-__host__ __device__ inline bool IsFinite(cuDoubleComplex x) {
-  return isfinite(cuCreal(x)) && isfinite(cuCimag(x));
+__device__ inline bool IsFinite(cuDoubleComplex x) {
+  return Eigen::numext::isfinite(cuCreal(x)) &&
+         Eigen::numext::isfinite(cuCimag(x));
 }
 
 template 
 struct Const {
   template 
-  __host__ __device__ static inline Scalar make_const(const RealScalar x) {
+  __device__ static inline Scalar make_const(const RealScalar x) {
     return Scalar(x);
   }
 };
@@ -107,7 +109,7 @@ struct Const {
 template <>
 struct Const {
   template 
-  __host__ __device__ static inline cuComplex make_const(const RealScalar x) {
+  __device__ static inline cuComplex make_const(const RealScalar x) {
     return make_cuComplex(x, 0.0f);
   }
 };
@@ -115,8 +117,7 @@ struct Const {
 template <>
 struct Const {
   template 
-  __host__ __device__ static inline cuDoubleComplex make_const(
-      const RealScalar x) {
+  __device__ static inline cuDoubleComplex make_const(const RealScalar x) {
     return make_cuDoubleComplex(x, 0.0f);
   }
 };
diff --git a/tensorflow/core/kernels/dataset_utils.cc b/tensorflow/core/kernels/dataset_utils.cc
new file mode 100644
index 0000000000..f320b3b09c
--- /dev/null
+++ b/tensorflow/core/kernels/dataset_utils.cc
@@ -0,0 +1,78 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/dataset_utils.h"
+
+namespace tensorflow {
+
+namespace dataset {
+
+Status MakeIteratorFromInputElement(
+    IteratorContext* ctx, const std::vector& input_element,
+    int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
+    std::unique_ptr* out_iterator) {
+  FunctionLibraryRuntime::Options opts;
+  opts.runner = ctx->runner();
+  // Choose a step ID that is guaranteed not to clash with any
+  // Session-generated step ID. DirectSession only generates
+  // non-negative step IDs (contiguous, starting from 0), and
+  // MasterSession generates 56-bit random step IDs whose MSB
+  // is always 0, so a negative random step ID should suffice.
+  opts.step_id = CapturedFunction::generate_step_id();
+  ScopedStepContainer step_container(
+      opts.step_id, [captured_func, ctx](const string& name) {
+        captured_func->resource_manager()->Cleanup(name).IgnoreError();
+      });
+  opts.step_container = &step_container;
+  std::vector return_values;
+  TF_RETURN_IF_ERROR(captured_func->Run(opts, input_element, &return_values));
+
+  if (!(return_values.size() == 1 && return_values[0].dtype() == DT_RESOURCE &&
+        TensorShapeUtils::IsScalar(return_values[0].shape()))) {
+    return errors::InvalidArgument(
+        "Function must return a single scalar of dtype DT_RESOURCE.");
+  }
+
+  // Retrieve the dataset that was created in `f`.
+  DatasetBase* returned_dataset;
+  const ResourceHandle& dataset_resource =
+      return_values[0].scalar()();
+
+  // NOTE(mrry): We cannot use the core `LookupResource()` or
+  // `DeleteResource()` functions, because we have an
+  // `IteratorContext*` and not an `OpKernelContext*`, so we
+  // replicate the necessary functionality here.
+  auto type_index = MakeTypeIndex();
+  if (type_index.hash_code() != dataset_resource.hash_code()) {
+    return errors::InvalidArgument("Function must return a Dataset resource.");
+  }
+  TF_RETURN_IF_ERROR(captured_func->resource_manager()->Lookup(
+      dataset_resource.container(), dataset_resource.name(),
+      &returned_dataset));
+  core::ScopedUnref unref_dataset(returned_dataset);
+
+  // Create an iterator for the dataset that was returned by
+  // `f`. This transfers ownership of the dataset to the
+  // iterator, so we can delete it from the resource manager.
+  *out_iterator = returned_dataset->MakeIterator(
+      strings::StrCat(prefix, "[", thread_index, "]"));
+  TF_RETURN_IF_ERROR(captured_func->resource_manager()->Delete(
+      dataset_resource.container(), dataset_resource.name()));
+  return Status::OK();
+}
+
+}  // namespace dataset
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/dataset_utils.h b/tensorflow/core/kernels/dataset_utils.h
new file mode 100644
index 0000000000..eea2b8802b
--- /dev/null
+++ b/tensorflow/core/kernels/dataset_utils.h
@@ -0,0 +1,35 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_UTILS_H_
+
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/captured_function.h"
+#include "tensorflow/core/kernels/dataset.h"
+
+namespace tensorflow {
+
+namespace dataset {
+
+Status MakeIteratorFromInputElement(
+    IteratorContext* ctx, const std::vector& input_element,
+    int64 thread_index, CapturedFunction* captured_func, StringPiece prefix,
+    std::unique_ptr* out_iterator);
+
+}  // namespace dataset
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_DATASET_UTILS_H_
diff --git a/tensorflow/core/kernels/flat_map_dataset_op.cc b/tensorflow/core/kernels/flat_map_dataset_op.cc
index e2310fecc7..a87e54bf31 100644
--- a/tensorflow/core/kernels/flat_map_dataset_op.cc
+++ b/tensorflow/core/kernels/flat_map_dataset_op.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/lib/random/random.h"
 
 #include "tensorflow/core/kernels/captured_function.h"
+#include "tensorflow/core/kernels/dataset_utils.h"
 
 namespace tensorflow {
 
@@ -125,58 +126,9 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
             return Status::OK();
           }
 
-          FunctionLibraryRuntime::Options opts;
-          opts.runner = ctx->runner();
-          opts.step_id = CapturedFunction::generate_step_id();
-          ScopedStepContainer step_container(
-              opts.step_id, [this, ctx](const string& name) {
-                dataset()
-                    ->captured_func_->resource_manager()
-                    ->Cleanup(name)
-                    .IgnoreError();
-              });
-          opts.step_container = &step_container;
-          std::vector return_values;
-          TF_RETURN_IF_ERROR(
-              dataset()->captured_func_->Run(opts, args, &return_values));
-
-          if (!(return_values.size() == 1 &&
-                return_values[0].dtype() == DT_RESOURCE &&
-                TensorShapeUtils::IsScalar(return_values[0].shape()))) {
-            return errors::InvalidArgument(
-                "`f` must return a single scalar of dtype DT_RESOURCE.");
-          }
-
-          // Retrieve the dataset that was created in `f`.
-          DatasetBase* returned_dataset;
-          const ResourceHandle& dataset_resource =
-              return_values[0].scalar()();
-
-          // NOTE(mrry): We cannot use the core `LookupResource()` or
-          // `DeleteResource()` functions, because we have an
-          // `IteratorContext*` and not an `OpKernelContext*`, so we
-          // replicate the necessary functionality here.
-          auto type_index = MakeTypeIndex();
-          if (type_index.hash_code() != dataset_resource.hash_code()) {
-            return errors::InvalidArgument(
-                "`f` must return a Dataset resource.");
-          }
-          TF_RETURN_IF_ERROR(
-              dataset()->captured_func_->resource_manager()->Lookup(
-                  dataset_resource.container(), dataset_resource.name(),
-                  &returned_dataset));
-          core::ScopedUnref unref_dataset(returned_dataset);
-
-          // Create an iterator for the dataset that was returned by
-          // `f`. This transfers ownership of the dataset to the
-          // iterator, so we can delete it from the resource manager.
-          current_element_iterator_ = returned_dataset->MakeIterator(
-              strings::StrCat(prefix(), "[", element_index_++, "]"));
-          TF_RETURN_IF_ERROR(
-              dataset()
-                  ->captured_func_->resource_manager()
-                  ->Delete(dataset_resource.container(),
-                                        dataset_resource.name()));
+          TF_RETURN_IF_ERROR(dataset::MakeIteratorFromInputElement(
+              ctx, args, element_index_++, dataset()->captured_func_.get(),
+              prefix(), ¤t_element_iterator_));
         } while (true);
       }
 
diff --git a/tensorflow/core/kernels/interleave_dataset_op.cc b/tensorflow/core/kernels/interleave_dataset_op.cc
index dce4f88101..7b148b74c9 100644
--- a/tensorflow/core/kernels/interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/interleave_dataset_op.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/lib/random/random.h"
 
 #include "tensorflow/core/kernels/captured_function.h"
+#include "tensorflow/core/kernels/dataset_utils.h"
 
 namespace tensorflow {
 
@@ -168,8 +169,9 @@ class InterleaveDatasetOp : public OpKernel {
             TF_RETURN_IF_ERROR(
                 input_impl_->GetNext(ctx, &args, &end_of_input_));
             if (!end_of_input_) {
-              TF_RETURN_IF_ERROR(MakeIteratorFromInputElement(
-                  ctx, args, ¤t_elements_[cycle_index_]));
+              TF_RETURN_IF_ERROR(dataset::MakeIteratorFromInputElement(
+                  ctx, args, cycle_index_, dataset()->captured_func_.get(),
+                  prefix(), ¤t_elements_[cycle_index_]));
               ++num_open_;
             }
           } else {
@@ -182,62 +184,6 @@ class InterleaveDatasetOp : public OpKernel {
       }
 
      private:
-      Status MakeIteratorFromInputElement(
-          IteratorContext* ctx, const std::vector& input_element,
-          std::unique_ptr* out_iterator)
-          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-        FunctionLibraryRuntime::Options opts;
-        opts.runner = ctx->runner();
-        opts.step_id = CapturedFunction::generate_step_id();
-        ScopedStepContainer step_container(
-            opts.step_id, [this, ctx](const string& name) {
-              dataset()
-                  ->captured_func_->resource_manager()
-                  ->Cleanup(name)
-                  .IgnoreError();
-            });
-        opts.step_container = &step_container;
-        std::vector return_values;
-        TF_RETURN_IF_ERROR(dataset()->captured_func_->Run(opts, input_element,
-                                                          &return_values));
-
-        if (!(return_values.size() == 1 &&
-              return_values[0].dtype() == DT_RESOURCE &&
-              TensorShapeUtils::IsScalar(return_values[0].shape()))) {
-          return errors::InvalidArgument(
-              "`f` must return a single scalar of dtype DT_RESOURCE.");
-        }
-
-        // Retrieve the dataset that was created in `f`.
-        DatasetBase* returned_dataset;
-        const ResourceHandle& dataset_resource =
-            return_values[0].scalar()();
-
-        // NOTE(mrry): We cannot use the core `LookupResource()` or
-        // `DeleteResource()` functions, because we have an
-        // `IteratorContext*` and not an `OpKernelContext*`, so we
-        // replicate the necessary functionality here.
-        auto type_index = MakeTypeIndex();
-        if (type_index.hash_code() != dataset_resource.hash_code()) {
-          return errors::InvalidArgument("`f` must return a Dataset resource.");
-        }
-        TF_RETURN_IF_ERROR(
-            dataset()->captured_func_->resource_manager()->Lookup(
-                dataset_resource.container(), dataset_resource.name(),
-                &returned_dataset));
-        core::ScopedUnref unref_dataset(returned_dataset);
-
-        // Create an iterator for the dataset that was returned by
-        // `f`. This transfers ownership of the dataset to the
-        // iterator, so we can delete it from the resource manager.
-        *out_iterator = returned_dataset->MakeIterator(
-            strings::StrCat(prefix(), "[", cycle_index_, "]"));
-        TF_RETURN_IF_ERROR(
-            dataset()->captured_func_->resource_manager()->Delete(
-                dataset_resource.container(), dataset_resource.name()));
-        return Status::OK();
-      }
-
       mutex mu_;
       const std::unique_ptr input_impl_ GUARDED_BY(mu_);
       std::vector> current_elements_
diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
index 50700c8bc8..00884d0981 100644
--- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc
@@ -98,11 +98,11 @@ class MklConv2DCustomBackpropInputOp : public OpKernel {
                       "Conv2DCustomBackpropInput: size must be 4-dim"));
 
       const int64* filter_sizes =
-        (const int64*) mkl_context.filter_shape.GetSizes();
+          (const int64*)mkl_context.filter_shape.GetSizes();
       const int64 filter_dims = mkl_context.filter_shape.GetDimension();
 
-      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(filter_sizes,
-        filter_dims, &filter_shape));
+      OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
+                                  filter_sizes, filter_dims, &filter_shape));
     } else {
       filter_shape = filter.shape();
     }
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index b50a6343ba..7099aa1307 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -270,22 +270,22 @@ class MklConv2DOp : public OpKernel {
     MklShape mkl_filter_output_mkl_shape;
     mkl_filter_output_mkl_shape.SetMklTensor(true);
     mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd,
-        dnnResourceFilter);
+                                             dnnResourceFilter);
 
     size_t filter_sizes[4] = {filter.dim_size(0), filter.dim_size(1),
-        filter.dim_size(2), filter.dim_size(3)};
+                              filter.dim_size(2), filter.dim_size(3)};
     mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), filter_sizes,
-    mkl_context.filter_strides);
+                                            mkl_context.filter_strides);
 
     mkl_filter_output_mkl_shape.SetTfDimOrder(mkl_context.filter_dims,
-        data_format_);
+                                              data_format_);
     mkl_filter_output_tf_shape.AddDim(
-        dnnLayoutGetMemorySize_F32(
-            static_cast(
-                mkl_filter_output_mkl_shape.GetMklLayout())) /
-                    sizeof(T));
+        dnnLayoutGetMemorySize_F32(static_cast(
+            mkl_filter_output_mkl_shape.GetMklLayout())) /
+        sizeof(T));
     AllocateOutputSetMklShape(context, 1, &mkl_context.output_filter,
-        mkl_filter_output_tf_shape, mkl_filter_output_mkl_shape);
+                              mkl_filter_output_tf_shape,
+                              mkl_filter_output_mkl_shape);
 
     mkl_context.conv_res[dnnResourceDst] =
         static_cast(output->flat().data());
@@ -406,8 +406,13 @@ class MklConv2DOp : public OpKernel {
         CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_filter, lt_filter,
                                          mkl_lt_internal_filter),
                  E_SUCCESS);
+<<<<<<< HEAD
         mkl_buf_convert_filter = const_cast(static_cast(
             output_filter->flat().data()));
+=======
+        mkl_buf_convert_filter = const_cast(
+            static_cast(output_filter->flat().data()));
+>>>>>>> e722358e7e96dd2aa20d7e2c56336e76845daa6a
         CHECK_EQ(
             dnnConversionExecute_F32(mkl_prim_convert_filter, mkl_buf_filter,
                                      mkl_buf_convert_filter),
diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc
index 03c3fb09a1..5e98582475 100644
--- a/tensorflow/core/kernels/mkl_reshape_op.cc
+++ b/tensorflow/core/kernels/mkl_reshape_op.cc
@@ -128,6 +128,7 @@ class MklReshapeOp : public OpKernel {
       CopyTfTensorInToOutWithShape(context, 0, 0, shape);
     }
   }
+
  private:
   template 
   Status ValidateSizes(const Tensor& sizes, int64* product, int* unknown_index,
diff --git a/tensorflow/core/kernels/parse_tensor_op.cc b/tensorflow/core/kernels/parse_tensor_op.cc
index dd645262d2..ab91a6ef67 100644
--- a/tensorflow/core/kernels/parse_tensor_op.cc
+++ b/tensorflow/core/kernels/parse_tensor_op.cc
@@ -16,6 +16,7 @@ limitations under the License.
 // See docs in ../ops/parsing_ops.cc.
 
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.h"
@@ -66,7 +67,6 @@ class ParseTensorOp : public OpKernel {
 
 REGISTER_KERNEL_BUILDER(Name("ParseTensor").Device(DEVICE_CPU), ParseTensorOp);
 
-
 template 
 class SerializeTensorOp : public OpKernel {
  public:
@@ -81,14 +81,14 @@ class SerializeTensorOp : public OpKernel {
       tensor.AsProtoTensorContent(&proto);
     }
     Tensor* proto_string = nullptr;
-    OP_REQUIRES_OK(
-        context, context->allocate_output(0, TensorShape({}), &proto_string));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, TensorShape({}), &proto_string));
     CHECK(proto.SerializeToString(&proto_string->scalar()()));
   }
 };
 
-#define REGISTER(T) \
-  REGISTER_KERNEL_BUILDER( \
+#define REGISTER(T)                                                      \
+  REGISTER_KERNEL_BUILDER(                                               \
       Name("SerializeTensor").Device(DEVICE_CPU).TypeConstraint("T"), \
       SerializeTensorOp);
 TF_CALL_ALL_TYPES(REGISTER)
diff --git a/tensorflow/core/kernels/parse_tensor_test.cc b/tensorflow/core/kernels/parse_tensor_test.cc
index f6f60fee71..4a5fc07935 100644
--- a/tensorflow/core/kernels/parse_tensor_test.cc
+++ b/tensorflow/core/kernels/parse_tensor_test.cc
@@ -14,8 +14,8 @@ limitations under the License.
 ==============================================================================*/
 
 #include 
-#include 
 #include 
+#include 
 
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
@@ -33,27 +33,23 @@ namespace {
 class SerializeTensorOpTest : public OpsTestBase {
  protected:
   template 
-  void MakeOp(const TensorShape& input_shape,
-              std::function functor) {
-    TF_ASSERT_OK(
-        NodeDefBuilder("myop", "SerializeTensor")
-            .Input(FakeInput(DataTypeToEnum::value))
-            .Finalize(node_def()));
+  void MakeOp(const TensorShape& input_shape, std::function functor) {
+    TF_ASSERT_OK(NodeDefBuilder("myop", "SerializeTensor")
+                     .Input(FakeInput(DataTypeToEnum::value))
+                     .Finalize(node_def()));
     TF_ASSERT_OK(InitOp());
     AddInput(input_shape, functor);
   }
   void ParseSerializedWithNodeDef(const NodeDef& parse_node_def,
-                                    Tensor* serialized,
-                                    Tensor* parse_output) {
+                                  Tensor* serialized, Tensor* parse_output) {
     std::unique_ptr device(
         DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0"));
     gtl::InlinedVector inputs;
     inputs.push_back({nullptr, serialized});
     Status status;
-    std::unique_ptr op(
-        CreateOpKernel(DEVICE_CPU, device.get(),
-                       cpu_allocator(), parse_node_def,
-                       TF_GRAPH_DEF_VERSION, &status));
+    std::unique_ptr op(CreateOpKernel(DEVICE_CPU, device.get(),
+                                                cpu_allocator(), parse_node_def,
+                                                TF_GRAPH_DEF_VERSION, &status));
     TF_EXPECT_OK(status);
     OpKernelContext::Params params;
     params.device = device.get();
@@ -80,8 +76,8 @@ class SerializeTensorOpTest : public OpsTestBase {
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_half) {
   MakeOp(TensorShape({10}), [](int x) -> Eigen::half {
-      return static_cast(x / 10.);
-    });
+    return static_cast(x / 10.);
+  });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -89,9 +85,8 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_half) {
 }
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_float) {
-  MakeOp(TensorShape({1, 10}), [](int x) -> float {
-      return static_cast(x / 10.);
-    });
+  MakeOp(TensorShape({1, 10}),
+                [](int x) -> float { return static_cast(x / 10.); });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -99,9 +94,8 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_float) {
 }
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_double) {
-  MakeOp(TensorShape({5, 5}), [](int x) -> double {
-      return static_cast(x / 10.);
-    });
+  MakeOp(TensorShape({5, 5}),
+                 [](int x) -> double { return static_cast(x / 10.); });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -109,9 +103,8 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_double) {
 }
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int64) {
-  MakeOp(TensorShape({2, 3, 4}), [](int x) -> int64 {
-      return static_cast(x - 10);
-    });
+  MakeOp(TensorShape({2, 3, 4}),
+                [](int x) -> int64 { return static_cast(x - 10); });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -119,9 +112,8 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int64) {
 }
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int32) {
-  MakeOp(TensorShape({4, 2}), [](int x) -> int32 {
-      return static_cast(x + 7);
-    });
+  MakeOp(TensorShape({4, 2}),
+                [](int x) -> int32 { return static_cast(x + 7); });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -129,9 +121,8 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int32) {
 }
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int16) {
-  MakeOp(TensorShape({8}), [](int x) -> int16 {
-      return static_cast(x + 18);
-    });
+  MakeOp(TensorShape({8}),
+                [](int x) -> int16 { return static_cast(x + 18); });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -139,9 +130,8 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int16) {
 }
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int8) {
-  MakeOp(TensorShape({2}), [](int x) -> int8 {
-      return static_cast(x + 8);
-    });
+  MakeOp(TensorShape({2}),
+               [](int x) -> int8 { return static_cast(x + 8); });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -149,9 +139,8 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int8) {
 }
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_uint16) {
-  MakeOp(TensorShape({1, 3}), [](int x) -> uint16 {
-      return static_cast(x + 2);
-    });
+  MakeOp(TensorShape({1, 3}),
+                 [](int x) -> uint16 { return static_cast(x + 2); });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -159,9 +148,8 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_uint16) {
 }
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_uint8) {
-  MakeOp(TensorShape({2, 1, 1}), [](int x) -> uint8 {
-      return static_cast(x + 1);
-    });
+  MakeOp(TensorShape({2, 1, 1}),
+                [](int x) -> uint8 { return static_cast(x + 1); });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -170,9 +158,8 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_uint8) {
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_complex64) {
   MakeOp(TensorShape({}), [](int x) -> complex64 {
-      return complex64{ static_cast(x / 8.),
-            static_cast(x / 2.) };
-    });
+    return complex64{static_cast(x / 8.), static_cast(x / 2.)};
+  });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -181,8 +168,8 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_complex64) {
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_complex128) {
   MakeOp(TensorShape({3}), [](int x) -> complex128 {
-      return complex128{ x / 3., x / 2. };
-    });
+    return complex128{x / 3., x / 2.};
+  });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -190,9 +177,8 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_complex128) {
 }
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_bool) {
-  MakeOp(TensorShape({1}), [](int x) -> bool {
-      return static_cast(x % 2);
-    });
+  MakeOp(TensorShape({1}),
+               [](int x) -> bool { return static_cast(x % 2); });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
   ParseSerializedOutput(GetOutput(0), &parse_output);
@@ -200,13 +186,12 @@ TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_bool) {
 }
 
 TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_string) {
-  MakeOp(TensorShape({10}), [](int x) -> std::string {
-      return std::to_string(x / 10.);
-    });
+  MakeOp(TensorShape({10}),
+                 [](int x) -> string { return std::to_string(x / 10.); });
   TF_ASSERT_OK(RunOpKernel());
   Tensor parse_output;
-  ParseSerializedOutput(GetOutput(0), &parse_output);
-  test::ExpectTensorEqual(parse_output, GetInput(0));
+  ParseSerializedOutput(GetOutput(0), &parse_output);
+  test::ExpectTensorEqual(parse_output, GetInput(0));
 }
 
 }  // namespace
diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc
index 8f7eff113c..5624d5cd1b 100644
--- a/tensorflow/core/kernels/segment_reduction_ops.cc
+++ b/tensorflow/core/kernels/segment_reduction_ops.cc
@@ -35,7 +35,6 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/util.h"
 
-
 #if GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
 #include "tensorflow/core/kernels/cuda_solvers.h"
@@ -249,10 +248,11 @@ class SegmentSumGPUOp : public AsyncOpKernel {
 
     auto stream = context->op_device_context()->stream();
     OP_REQUIRES_ASYNC(
-        context, stream
-                     ->ThenMemcpy(output_rows_host.mutable_data(),
-                                  output_rows_device, sizeof(Index))
-                     .ok(),
+        context,
+        stream
+            ->ThenMemcpy(output_rows_host.mutable_data(), output_rows_device,
+                         sizeof(Index))
+            .ok(),
         errors::Internal(
             "SegmentSumGPUOp: failed to copy output_rows from device"),
         done);
diff --git a/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc
index 26fcafee34..159fada621 100644
--- a/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc
@@ -186,10 +186,10 @@ void SegmentSumFunctor::operator()(
       input_inner_dim_size * input_outer_dim_num_stripe;
 
   config = GetCudaLaunchConfig(total_stripe_count, d);
-  SortedSegmentSumCustomKernel<<<
-      config.block_count, config.thread_per_block, 0, d.stream()>>>(
-      input_outer_dim_size, input_inner_dim_size, output_rows,
-      segment_ids.data(), data, output.data(), total_stripe_count);
+  SortedSegmentSumCustomKernel
+      <<>>(
+          input_outer_dim_size, input_inner_dim_size, output_rows,
+          segment_ids.data(), data, output.data(), total_stripe_count);
 };
 
 // UnsortedSegmentSumFunctor implementation for GPUDevice.
diff --git a/tensorflow/core/kernels/sloppy_interleave_dataset_op.cc b/tensorflow/core/kernels/sloppy_interleave_dataset_op.cc
new file mode 100644
index 0000000000..d95f51f0f2
--- /dev/null
+++ b/tensorflow/core/kernels/sloppy_interleave_dataset_op.cc
@@ -0,0 +1,370 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/core/kernels/dataset.h"
+
+#include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/kernels/dataset_utils.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
+#include "tensorflow/core/lib/random/random.h"
+
+#include "tensorflow/core/kernels/captured_function.h"
+
+namespace tensorflow {
+
+namespace {
+
+// See documentation in ../ops/dataset_ops.cc for a high-level
+// description of the following op.
+
+class SloppyInterleaveDatasetOp : public UnaryDatasetOpKernel {
+ public:
+  explicit SloppyInterleaveDatasetOp(OpKernelConstruction* ctx)
+      : UnaryDatasetOpKernel(ctx),
+        graph_def_version_(ctx->graph_def_version()) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_));
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
+  }
+
+  void MakeDataset(OpKernelContext* ctx, DatasetBase* input,
+                   DatasetBase** output) override {
+    OpInputList inputs;
+    OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs));
+    std::vector other_arguments;
+    other_arguments.reserve(inputs.size());
+    for (const Tensor& t : inputs) {
+      other_arguments.push_back(t);
+    }
+
+    int64 cycle_length;
+    OP_REQUIRES_OK(ctx,
+                   ParseScalarArgument(ctx, "cycle_length", &cycle_length));
+    OP_REQUIRES(ctx, cycle_length > 0,
+                errors::InvalidArgument("`cycle_length` must be > 0"));
+
+    int64 block_length;
+    OP_REQUIRES_OK(ctx,
+                   ParseScalarArgument(ctx, "block_length", &block_length));
+    OP_REQUIRES(ctx, block_length > 0,
+                errors::InvalidArgument("`block_length` must be > 0"));
+
+    std::unique_ptr captured_func;
+    OP_REQUIRES_OK(ctx, CapturedFunction::Create(ctx, func_, graph_def_version_,
+                                                 std::move(other_arguments),
+                                                 &captured_func));
+
+    *output = new Dataset(input, std::move(captured_func), cycle_length,
+                          block_length, output_types_, output_shapes_);
+  }
+
+ private:
+  class Dataset : public DatasetBase {
+   public:
+    Dataset(const DatasetBase* input,
+            std::unique_ptr captured_func, int64 cycle_length,
+            int64 block_length, const DataTypeVector& output_types,
+            const std::vector& output_shapes)
+        : input_(input),
+          captured_func_(std::move(captured_func)),
+          cycle_length_(cycle_length),
+          block_length_(block_length),
+          output_types_(output_types),
+          output_shapes_(output_shapes) {
+      input_->Ref();
+    }
+
+    ~Dataset() override { input_->Unref(); }
+
+    std::unique_ptr MakeIterator(
+        const string& prefix) const override {
+      return std::unique_ptr(
+          new Iterator({this, strings::StrCat(prefix, "::SloppyInterleave")}));
+    }
+
+    const DataTypeVector& output_dtypes() const override {
+      return output_types_;
+    }
+    const std::vector& output_shapes() const override {
+      return output_shapes_;
+    }
+
+    string DebugString() override {
+      return "SloppyInterleaveDatasetOp::Dataset";
+    }
+
+   private:
+    class Iterator : public DatasetIterator {
+     public:
+      explicit Iterator(const Params& params)
+          : DatasetIterator(params),
+            input_impl_(params.dataset->input_->MakeIterator(params.prefix)),
+            output_elements_(params.dataset->cycle_length_) {}
+
+      ~Iterator() override {
+        mutex_lock l(mu_);
+        cancelled_ = true;
+        // Notify all workers in case they are blocked.
+        for (int64 i = 0; i < dataset()->cycle_length_; ++i) {
+          output_elements_[i].cond_var.notify_all();
+        }
+      }
+
+      // It is implemented so that it matches the deterministic interleave
+      // unless we would block waiting for an element, at which point it skips
+      // along to the next available value.
+      Status GetNextInternal(IteratorContext* ctx,
+                             std::vector* out_tensors,
+                             bool* end_of_sequence) override {
+        mutex_lock l(mu_);
+        TF_RETURN_IF_ERROR(EnsureWorkerThreadsStarted(ctx));
+        // Search for available items, blocking if necessary.
+        while (!cancelled_) {
+          for (size_t i = 0; i < dataset()->cycle_length_; ++i) {
+            size_t index = (next_index_ + i) % dataset()->cycle_length_;
+            if (output_elements_[index].is_produced) {
+              next_index_ = index;
+              if (i == 0) {
+                block_count_++;
+                if (block_count_ == dataset()->block_length_) {
+                  next_index_ = (index + 1) % dataset()->cycle_length_;
+                  block_count_ = 0;
+                }
+              } else {
+                block_count_ = 0;
+              }
+              // If we encounter an EoF, advance to the next iterator
+              if (output_elements_[index].end_of_sequence) {
+                output_elements_[index].is_produced = false;
+                output_elements_[index].cond_var.notify_one();
+                next_index_ = (index + 1) % dataset()->cycle_length_;
+                block_count_ = 0;
+                i = -1;  // Restart the inner loop
+                continue;
+              }
+              *end_of_sequence = false;
+              if (output_elements_[index].output_status.ok()) {
+                output_elements_[index].output_value.swap(*out_tensors);
+              }
+              output_elements_[index].is_produced = false;
+              output_elements_[index].cond_var.notify_one();
+              return output_elements_[index].output_status;
+            }
+          }
+
+          if (num_active_threads_ == 0) {
+            // No potential for future values.
+            //
+            // Note: this condition check must occur after checking the output
+            // buffer, as its possible for there to be values in the output
+            // buffer, even if the number of live threads is zero.
+            *end_of_sequence = true;
+            return Status::OK();
+          }
+          // No values available; wait until woken up.
+          cond_var_.wait(l);
+        }
+        return errors::Cancelled(
+            "SloppyInterleaveDatasetOp::Dataset::Iterator::GetNext");
+      }
+
+     private:
+      // Internal structure to manage thread coordination. All values are
+      // guarded by the enclosing Iterator's mu_.
+      struct OutputBufferElement {
+        // The producer must set `is_produced` to `true` after
+        // `output_status` or `output_value` has been written.
+        bool is_produced = false;
+        // The producer sets `output_status` if either getting the input element
+        // or applying the function to it fails.
+        Status output_status;
+        // Reached end of sequence for the underlying iterator.
+        bool end_of_sequence = false;
+        // The output data element.
+        std::vector output_value;
+        // The producer thread waits on this condition variable after having
+        // produced an element. The reader thread notifies this condition
+        // variable after reading the value.
+        condition_variable cond_var;
+      };
+
+      Status EnsureWorkerThreadsStarted(IteratorContext* ctx)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        if (worker_threads_.empty()) {
+          for (int64 i = 0; i < dataset()->cycle_length_; ++i) {
+            // Serialize the creation of the workers and their corresponding
+            // input elements to ensure we match the standard interleave when
+            // the underlying iterators induce no delay.
+            std::vector args;
+            TF_RETURN_IF_ERROR(
+                input_impl_->GetNext(ctx, &args, &end_of_input_));
+            if (end_of_input_) {
+              LOG(WARNING) << "Input iterator exhausted after " << i
+                           << " elements; cannot start all "
+                           << dataset()->cycle_length_ << " worker threads.";
+              return Status::OK();
+            }
+            std::unique_ptr itr;
+            TF_RETURN_IF_ERROR(dataset::MakeIteratorFromInputElement(
+                ctx, args, i, dataset()->captured_func_.get(), prefix(), &itr));
+            worker_threads_.emplace_back(
+                std::unique_ptr(ctx->env()->StartThread(
+                    {}, "worker_thread",
+                    std::bind(&Iterator::WorkerThread, this,
+                              new IteratorContext(*ctx), i, itr.release()))));
+            num_active_threads_ = i + 1;
+          }
+        }
+        return Status::OK();
+      }
+
+      void BlockAndUpdateOutputBuffer(mutex_lock* l, const int64 thread_index,
+                                      const Status& status,
+                                      bool end_of_sequence,
+                                      std::vector* out_tensors)
+          EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        // We have produced an element; push it into the output buffer
+        // when space is available.
+        while (!cancelled_ && output_elements_[thread_index].is_produced) {
+          output_elements_[thread_index].cond_var.wait(*l);
+        }
+        if (cancelled_) {
+          return;
+        }
+        output_elements_[thread_index].is_produced = true;
+        output_elements_[thread_index].output_status = status;
+        output_elements_[thread_index].end_of_sequence = end_of_sequence;
+        if (status.ok()) {
+          output_elements_[thread_index].output_value.swap(*out_tensors);
+        } else {
+          output_elements_[thread_index].output_value.clear();
+        }
+        cond_var_.notify_one();
+      }
+
+      // Races to produce elements into the output queue buffers.
+      void WorkerThread(IteratorContext* ctx_ptr, const int64 thread_index,
+                        IteratorBase* out_iterator_ptr) {
+        // std::function arguments are copy-constructable, so we pass raw
+        // pointers, and then immediately wrap them to ensure correct ownership.
+        std::unique_ptr ctx(ctx_ptr);
+        std::unique_ptr out_iterator(out_iterator_ptr);
+        auto cleanup = gtl::MakeCleanup([this, thread_index] {
+          mutex_lock l(mu_);
+          num_active_threads_--;
+          cond_var_.notify_all();
+        });
+        while (true) {
+          // Attempt to produce an element.
+          bool end_of_out_itr_input = false;
+          std::vector out_tensors;
+          Status element_status = out_iterator->GetNext(ctx.get(), &out_tensors,
+                                                        &end_of_out_itr_input);
+          // Handle output.
+          {
+            mutex_lock l(mu_);
+            BlockAndUpdateOutputBuffer(&l, thread_index, element_status,
+                                       end_of_out_itr_input, &out_tensors);
+            if (end_of_out_itr_input) {
+              // We have exhausted our current iterator; get a new iterator;
+              // loop to handle errors.
+              while (!cancelled_) {
+                if (end_of_input_) {
+                  // No more iterator inputs; we're done!
+                  return;
+                }
+                std::vector args;
+                // BlockAndUpdateOutputBuffer() sequences calls to
+                // input_impl_->GetNext when the out_iterator doesn't cause
+                // slopping.
+                Status input_status =
+                    input_impl_->GetNext(ctx.get(), &args, &end_of_input_);
+                if (end_of_input_) {
+                  // No more elements to produce, stop the worker thread.
+                  return;
+                }
+                if (input_status.ok()) {
+                  input_status = dataset::MakeIteratorFromInputElement(
+                      ctx.get(), args, thread_index,
+                      dataset()->captured_func_.get(), prefix(), &out_iterator);
+                }
+                if (input_status.ok()) {
+                  // Successfully have a new out_iterator; restart the outer
+                  // loop to produce an element.
+                  break;
+                }
+
+                // We encountered an error; push the error to the output buffer.
+                BlockAndUpdateOutputBuffer(&l, thread_index, input_status,
+                                           /* end_of_sequence = */ false,
+                                           &out_tensors);
+              }
+            }
+
+            // Check if we should exit.
+            if (cancelled_) {
+              return;
+            }
+          }
+        }
+      }
+
+      // Mutex & condition variable to guard mutable iterator internals and
+      // coordinate among worker threads and client thread[s].
+      mutex mu_;
+      condition_variable cond_var_;
+      // The iterator producing elements which are converted to datasets by
+      // the dataset()->captured_func_ then interleaved together.
+      const std::unique_ptr input_impl_ GUARDED_BY(mu_);
+      // Whether the input_impl_ can produce future elements.
+      bool end_of_input_ GUARDED_BY(mu_) = false;
+      // The buffer of elements to be produced. Each worker thread operates
+      // on a single OutputBufferElement.
+      std::vector output_elements_ GUARDED_BY(mu_);
+      // The index into output_elements_ for next element to produce.
+      size_t next_index_ GUARDED_BY(mu_) = 0;
+      // The number of items produced so far within the block
+      size_t block_count_ GUARDED_BY(mu_) = 0;
+      // Number of active threads.
+      size_t num_active_threads_ GUARDED_BY(mu_) = 0;
+      // Flag to instruct the worker threads to exit.
+      bool cancelled_ GUARDED_BY(mu_) = false;
+      // Pointers to the worker threads. This must be last to ensure the
+      // threads have exited before any other members are deallocated.
+      // TODO(b/65178177): Avoid allocating additional threads.
+      std::vector> worker_threads_ GUARDED_BY(mu_);
+    };
+
+    const DatasetBase* const input_;
+    const std::unique_ptr captured_func_;
+    const int64 cycle_length_;
+    const int64 block_length_;
+    const DataTypeVector output_types_;
+    const std::vector output_shapes_;
+  };
+
+  const int graph_def_version_;
+  DataTypeVector output_types_;
+  std::vector output_shapes_;
+  const NameAttrList* func_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("SloppyInterleaveDataset").Device(DEVICE_CPU),
+                        SloppyInterleaveDatasetOp);
+
+}  // namespace
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc
index d0eca0f1e7..cfa707de71 100644
--- a/tensorflow/core/kernels/summary_kernels.cc
+++ b/tensorflow/core/kernels/summary_kernels.cc
@@ -40,12 +40,7 @@ class CreateSummaryFileWriterOp : public OpKernel {
     SummaryWriterInterface* s;
     OP_REQUIRES_OK(ctx, CreateSummaryWriter(max_queue, flush_millis, logdir,
                                             filename_suffix, ctx->env(), &s));
-    Status status = CreateResource(ctx, HandleFromInput(ctx, 0), s);
-    if (!status.ok()) {
-      s->Unref();
-      ctx->SetStatus(status);
-      return;
-    }
+    OP_REQUIRES_OK(ctx, CreateResource(ctx, HandleFromInput(ctx, 0), s));
   }
 };
 REGISTER_KERNEL_BUILDER(Name("CreateSummaryFileWriter").Device(DEVICE_CPU),
diff --git a/tensorflow/core/lib/io/buffered_inputstream.cc b/tensorflow/core/lib/io/buffered_inputstream.cc
index 6f72da4713..b247e9c575 100644
--- a/tensorflow/core/lib/io/buffered_inputstream.cc
+++ b/tensorflow/core/lib/io/buffered_inputstream.cc
@@ -41,9 +41,18 @@ BufferedInputStream::~BufferedInputStream() {
 }
 
 Status BufferedInputStream::FillBuffer() {
+  if (!file_status_.ok()) {
+    pos_ = 0;
+    limit_ = 0;
+    return file_status_;
+  }
   Status s = input_stream_->ReadNBytes(size_, &buf_);
   pos_ = 0;
   limit_ = buf_.size();
+  if (buf_.empty()) {
+    DCHECK(!s.ok());
+    file_status_ = s;
+  }
   return s;
 }
 
@@ -82,6 +91,9 @@ Status BufferedInputStream::ReadNBytes(int64 bytes_to_read, string* result) {
                                    bytes_to_read);
   }
   result->clear();
+  if (!file_status_.ok() && bytes_to_read > 0) {
+    return file_status_;
+  }
   result->reserve(bytes_to_read);
 
   Status s;
@@ -91,6 +103,8 @@ Status BufferedInputStream::ReadNBytes(int64 bytes_to_read, string* result) {
       s = FillBuffer();
       // If we didn't read any bytes, we're at the end of the file; break out.
       if (limit_ == 0) {
+        DCHECK(!s.ok());
+        file_status_ = s;
         break;
       }
     }
@@ -124,6 +138,9 @@ Status BufferedInputStream::SkipNBytes(int64 bytes_to_skip) {
     Status s = input_stream_->SkipNBytes(bytes_to_skip - (limit_ - pos_));
     pos_ = 0;
     limit_ = 0;
+    if (errors::IsOutOfRange(s)) {
+      file_status_ = s;
+    }
     return s;
   }
   return Status::OK();
@@ -163,6 +180,7 @@ Status BufferedInputStream::ReadAll(string* result) {
   }
 
   if (errors::IsOutOfRange(status)) {
+    file_status_ = status;
     return Status::OK();
   }
   return status;
@@ -172,6 +190,7 @@ Status BufferedInputStream::Reset() {
   TF_RETURN_IF_ERROR(input_stream_->Reset());
   pos_ = 0;
   limit_ = 0;
+  file_status_ = Status::OK();
   return Status::OK();
 }
 
diff --git a/tensorflow/core/lib/io/buffered_inputstream.h b/tensorflow/core/lib/io/buffered_inputstream.h
index b37766005a..2b824f35f8 100644
--- a/tensorflow/core/lib/io/buffered_inputstream.h
+++ b/tensorflow/core/lib/io/buffered_inputstream.h
@@ -94,6 +94,9 @@ class BufferedInputStream : public InputStreamInterface {
   size_t pos_ = 0;    // current position in buf_.
   size_t limit_ = 0;  // just past the end of valid data in buf_.
   bool owns_input_stream_ = false;
+  // When EoF is reached, file_status_ contains the status to skip unnecessary
+  // buffer allocations.
+  Status file_status_ = Status::OK();
 
   TF_DISALLOW_COPY_AND_ASSIGN(BufferedInputStream);
 };
diff --git a/tensorflow/core/lib/io/buffered_inputstream_test.cc b/tensorflow/core/lib/io/buffered_inputstream_test.cc
index 7265101e1b..49b2b1a861 100644
--- a/tensorflow/core/lib/io/buffered_inputstream_test.cc
+++ b/tensorflow/core/lib/io/buffered_inputstream_test.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/core/lib/io/random_inputstream.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
 namespace io {
@@ -362,6 +363,45 @@ TEST(BufferedInputStream, ReadAll_Text) {
   }
 }
 
+void BM_BufferedReaderSmallReads(const int iters, const int buff_size,
+                                 const int file_size) {
+  testing::StopTiming();
+  Env* env = Env::Default();
+  string fname = testing::TmpDir() + "/buffered_inputstream_test";
+
+  const string file_elem = "0123456789";
+  std::unique_ptr write_file;
+  TF_ASSERT_OK(env->NewWritableFile(fname, &write_file));
+  for (int i = 0; i < file_size; ++i) {
+    TF_ASSERT_OK(write_file->Append(file_elem));
+  }
+  TF_ASSERT_OK(write_file->Close());
+
+  std::unique_ptr file;
+  TF_ASSERT_OK(env->NewRandomAccessFile(fname, &file));
+
+  string result;
+  testing::StartTiming();
+
+  for (int itr = 0; itr < iters; ++itr) {
+    BufferedInputStream in(file.get(), buff_size);
+    for (int64 i = 0; i < 10 * file_size; ++i) {
+      TF_ASSERT_OK(in.ReadNBytes(1, &result))
+          << "i: " << i << " itr: " << itr << " buff_size: " << buff_size
+          << " file size: " << file_size;
+    }
+  }
+}
+BENCHMARK(BM_BufferedReaderSmallReads)
+    ->ArgPair(1, 5)
+    ->ArgPair(1, 1024)
+    ->ArgPair(10, 5)
+    ->ArgPair(10, 1024)
+    ->ArgPair(1024, 1024)
+    ->ArgPair(1024 * 1024, 1024)
+    ->ArgPair(1024 * 1024, 1024 * 1024)
+    ->ArgPair(256 * 1024 * 1024, 1024);
+
 }  // anonymous namespace
 }  // namespace io
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/io/zlib_inputstream.h b/tensorflow/core/lib/io/zlib_inputstream.h
index a8a4e7c83c..8faa7dcb8f 100644
--- a/tensorflow/core/lib/io/zlib_inputstream.h
+++ b/tensorflow/core/lib/io/zlib_inputstream.h
@@ -37,7 +37,7 @@ namespace io {
 // by multiple threads
 class ZlibInputStream : public InputStreamInterface {
  public:
-  // Create a ZlibInputBuffer for `input_stream` with a buffer of size
+  // Create a ZlibInputStream for `input_stream` with a buffer of size
   // `input_buffer_bytes` bytes for reading contents from `input_stream` and
   // another buffer with size `output_buffer_bytes` for caching decompressed
   // contents. Does *not* take ownership of "input_stream".
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 651f22c6ea..62c86c7714 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -5488,24 +5488,28 @@ REGISTER_OP("BatchMatrixDiag")
     .Input("diagonal: T")
     .Output("output: T")
     .Attr("T: type")
-    .Deprecated(14, "Use MatrixDiag");
+    .Deprecated(14, "Use MatrixDiag")
+    .SetShapeFn(shape_inference::UnknownShape);
 REGISTER_OP("BatchMatrixSetDiag")
     .Input("input: T")
     .Input("diagonal: T")
     .Output("output: T")
     .Attr("T: type")
-    .Deprecated(14, "Use MatrixSetDiag");
+    .Deprecated(14, "Use MatrixSetDiag")
+    .SetShapeFn(shape_inference::UnknownShape);
 REGISTER_OP("BatchMatrixDiagPart")
     .Input("input: T")
     .Output("diagonal: T")
     .Attr("T: type")
-    .Deprecated(14, "Use MatrixDiagPart");
+    .Deprecated(14, "Use MatrixDiagPart")
+    .SetShapeFn(shape_inference::UnknownShape);
 REGISTER_OP("BatchMatrixBandPart")
     .Input("input: T")
     .Input("num_lower: int64")
     .Input("num_upper: int64")
     .Output("band: T")
     .Attr("T: type")
-    .Deprecated(14, "Use MatrixBandPart");
+    .Deprecated(14, "Use MatrixBandPart")
+    .SetShapeFn(shape_inference::UnknownShape);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 22d4a0056f..a8338620d6 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -24390,6 +24390,21 @@ op {
     type: "type"
   }
 }
+op {
+  name: "SerializeTensor"
+  input_arg {
+    name: "tensor"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "serialized"
+    type: DT_STRING
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+}
 op {
   name: "SetSize"
   input_arg {
@@ -24844,6 +24859,51 @@ op {
     }
   }
 }
+op {
+  name: "SloppyInterleaveDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "cycle_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "block_length"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "f"
+    type: "func"
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  is_stateful: true
+}
 op {
   name: "Softmax"
   input_arg {
@@ -28817,6 +28877,40 @@ op {
     }
   }
 }
+op {
+  name: "Sub"
+  input_arg {
+    name: "x"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "y"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "z"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "Substr"
   input_arg {
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 37d9a737e2..7cc8dccb95 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -233,6 +233,33 @@ f: A function mapping elements of `input_dataset`, concatenated with
   `output_types` and `output_shapes`.
 )doc");
 
+REGISTER_OP("SloppyInterleaveDataset")
+    .Input("input_dataset: resource")
+    .Input("other_arguments: Targuments")
+    .Input("cycle_length: int64")
+    .Input("block_length: int64")
+    .Output("handle: resource")
+    .Attr("f: func")
+    .Attr("Targuments: list(type) >= 0")
+    .Attr("output_types: list(type) >= 1")
+    .Attr("output_shapes: list(shape) >= 1")
+    .SetShapeFn(shape_inference::ScalarShape)
+    .Doc(R"doc(
+Creates a dataset that applies `f` to the outputs of `input_dataset`.
+
+The resulting dataset is similar to the `InterleaveDataset`, with the exception
+that if retrieving the next value from a dataset would cause the requester to
+block, it will skip that input dataset. This dataset is especially useful
+when loading data from a variable-latency datastores (e.g. HDFS, GCS), as it
+allows the training step to proceed so long as some data is available.
+
+!! WARNING !! This dataset is not deterministic!
+
+f: A function mapping elements of `input_dataset`, concatenated with
+   `other_arguments`, to a Dataset resource that contains elements matching
+   `output_types` and `output_shapes`.
+)doc");
+
 REGISTER_OP("GroupByWindowDataset")
     .Input("input_dataset: resource")
     .Input("key_func_other_arguments: Tkey_func_other_arguments")
diff --git a/tensorflow/core/ops/debug_ops.cc b/tensorflow/core/ops/debug_ops.cc
index bd7f7c2c01..5aebdca1ea 100644
--- a/tensorflow/core/ops/debug_ops.cc
+++ b/tensorflow/core/ops/debug_ops.cc
@@ -32,6 +32,7 @@ REGISTER_OP("Copy")
     .Attr("tensor_name: string = ''")
     .Attr("debug_ops_spec: list(string) = []")
     .SetAllowsUninitializedInput()
+    .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Copy Op.
 
@@ -61,6 +62,7 @@ REGISTER_OP("CopyHost")
     .Attr("tensor_name: string = ''")
     .Attr("debug_ops_spec: list(string) = []")
     .SetAllowsUninitializedInput()
+    .SetShapeFn(shape_inference::UnchangedShape)
     .Doc(R"doc(
 Copy Host Op.
 
@@ -118,6 +120,7 @@ REGISTER_OP("DebugNanCount")
     .Attr("debug_urls: list(string) = []")
     .Attr("gated_grpc: bool = false")
     .SetAllowsUninitializedInput()
+    .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 Debug NaN Value Counter Op
 
@@ -148,6 +151,8 @@ REGISTER_OP("DebugNumericSummary")
     .Attr("mute_if_healthy: bool = false")
     .Attr("gated_grpc: bool = false")
     .SetAllowsUninitializedInput()
+    // Note: this could return a more specific shape if needed in future.
+    .SetShapeFn(shape_inference::UnknownShape)
     .Doc(R"doc(
 Debug Numeric Summary Op.
 
diff --git a/tensorflow/core/ops/linalg_ops.cc b/tensorflow/core/ops/linalg_ops.cc
index 5b75bda1f1..48b2362342 100644
--- a/tensorflow/core/ops/linalg_ops.cc
+++ b/tensorflow/core/ops/linalg_ops.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"
 
@@ -557,34 +558,39 @@ REGISTER_OP("BatchSelfAdjointEig")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: {double, float}")
-    .Deprecated(11, "Use SelfAdjointEigV2 instead.");
+    .Deprecated(11, "Use SelfAdjointEigV2 instead.")
+    .SetShapeFn(shape_inference::UnknownShape);
 
 // Can all be deleted after 9mar2017.
 REGISTER_OP("BatchMatrixDeterminant")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: {float, double, complex64, complex128}")
-    .Deprecated(13, "Use MatrixDeterminant instead.");
+    .Deprecated(13, "Use MatrixDeterminant instead.")
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("BatchMatrixInverse")
     .Input("input: T")
     .Output("output: T")
     .Attr("adjoint: bool = False")
     .Attr("T: {double, float}")
-    .Deprecated(13, "Use MatrixInverse instead.");
+    .Deprecated(13, "Use MatrixInverse instead.")
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("BatchCholesky")
     .Input("input: T")
     .Output("output: T")
     .Attr("T: {double, float}")
-    .Deprecated(13, "Use Cholesky instead.");
+    .Deprecated(13, "Use Cholesky instead.")
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("BatchCholeskyGrad")
     .Input("l: T")
     .Input("grad: T")
     .Output("output: T")
     .Attr("T: {float, double}")
-    .Deprecated(13, "Use CholeskyGrad instead.");
+    .Deprecated(13, "Use CholeskyGrad instead.")
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("BatchSelfAdjointEigV2")
     .Input("input: T")
@@ -592,7 +598,8 @@ REGISTER_OP("BatchSelfAdjointEigV2")
     .Output("v: T")
     .Attr("compute_v: bool = True")
     .Attr("T: {double, float}")
-    .Deprecated(13, "Use SelfAdjointEigV2 instead.");
+    .Deprecated(13, "Use SelfAdjointEigV2 instead.")
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("BatchMatrixSolve")
     .Input("matrix: T")
@@ -600,7 +607,8 @@ REGISTER_OP("BatchMatrixSolve")
     .Output("output: T")
     .Attr("adjoint: bool = False")
     .Attr("T: {double, float}")
-    .Deprecated(13, "Use MatrixSolve instead.");
+    .Deprecated(13, "Use MatrixSolve instead.")
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("BatchMatrixTriangularSolve")
     .Input("matrix: T")
@@ -609,7 +617,8 @@ REGISTER_OP("BatchMatrixTriangularSolve")
     .Attr("lower: bool = True")
     .Attr("adjoint: bool = False")
     .Attr("T: {double, float}")
-    .Deprecated(13, "Use MatrixTriangularSolve instead.");
+    .Deprecated(13, "Use MatrixTriangularSolve instead.")
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("BatchMatrixSolveLs")
     .Input("matrix: T")
@@ -618,7 +627,8 @@ REGISTER_OP("BatchMatrixSolveLs")
     .Output("output: T")
     .Attr("T: {double, float}")
     .Attr("fast: bool = True")
-    .Deprecated(13, "Use MatrixSolveLs instead.");
+    .Deprecated(13, "Use MatrixSolveLs instead.")
+    .SetShapeFn(shape_inference::UnknownShape);
 
 REGISTER_OP("BatchSvd")
     .Input("input: T")
@@ -628,6 +638,7 @@ REGISTER_OP("BatchSvd")
     .Attr("compute_uv: bool = True")
     .Attr("full_matrices: bool = False")
     .Attr("T: {double, float, complex64, complex128}")
-    .Deprecated(13, "Use Svd instead.");
+    .Deprecated(13, "Use Svd instead.")
+    .SetShapeFn(shape_inference::UnknownShape);
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 35c31c6cb8..cfd3869d05 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -24034,6 +24034,25 @@ op {
   }
   summary: "Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object."
 }
+op {
+  name: "SerializeTensor"
+  input_arg {
+    name: "tensor"
+    description: "A Tensor of type `T`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "serialized"
+    description: "A serialized TensorProto proto of the input tensor."
+    type: DT_STRING
+  }
+  attr {
+    name: "T"
+    type: "type"
+    description: "The type of the input tensor."
+  }
+  summary: "Transforms a Tensor into a serialized TensorProto proto."
+}
 op {
   name: "SetSize"
   input_arg {
@@ -24535,6 +24554,54 @@ op {
   summary: "Return a slice from \'input\'."
   description: "The output tensor is a tensor with dimensions described by \'size\'\nwhose values are extracted from \'input\' starting at the offsets in\n\'begin\'.\n\n*Requirements*:\n  0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)"
 }
+op {
+  name: "SloppyInterleaveDataset"
+  input_arg {
+    name: "input_dataset"
+    type: DT_RESOURCE
+  }
+  input_arg {
+    name: "other_arguments"
+    type_list_attr: "Targuments"
+  }
+  input_arg {
+    name: "cycle_length"
+    type: DT_INT64
+  }
+  input_arg {
+    name: "block_length"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "handle"
+    type: DT_RESOURCE
+  }
+  attr {
+    name: "f"
+    type: "func"
+    description: "A function mapping elements of `input_dataset`, concatenated with\n`other_arguments`, to a Dataset resource that contains elements matching\n`output_types` and `output_shapes`."
+  }
+  attr {
+    name: "Targuments"
+    type: "list(type)"
+    has_minimum: true
+  }
+  attr {
+    name: "output_types"
+    type: "list(type)"
+    has_minimum: true
+    minimum: 1
+  }
+  attr {
+    name: "output_shapes"
+    type: "list(shape)"
+    has_minimum: true
+    minimum: 1
+  }
+  summary: "Creates a dataset that applies `f` to the outputs of `input_dataset`."
+  description: "The resulting dataset is similar to the `InterleaveDataset`, with the exception\nthat if retrieving the next value from a dataset would cause the requester to\nblock, it will skip that input dataset. This dataset is especially useful\nwhen loading data from a variable-latency datastores (e.g. HDFS, GCS), as it\nallows the training step to proceed so long as some data is available.\n\n!! WARNING !! This dataset is not deterministic!"
+  is_stateful: true
+}
 op {
   name: "Softmax"
   input_arg {
@@ -28908,6 +28975,10 @@ op {
         type: DT_HALF
         type: DT_FLOAT
         type: DT_DOUBLE
+        type: DT_UINT8
+        type: DT_INT8
+        type: DT_UINT16
+        type: DT_INT16
         type: DT_INT32
         type: DT_INT64
         type: DT_COMPLEX64
diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h
index 04ff9e12b6..d5f7350cdd 100644
--- a/tensorflow/core/platform/default/logging.h
+++ b/tensorflow/core/platform/default/logging.h
@@ -86,7 +86,7 @@ class LogMessageFatal : public LogMessage {
   ((lvl) <= ::tensorflow::internal::LogMessage::MinVLogLevel())
 #endif
 
-#define VLOG(lvl)      \
+#define VLOG(lvl)                        \
   if (TF_PREDICT_FALSE(VLOG_IS_ON(lvl))) \
   ::tensorflow::internal::LogMessage(__FILE__, __LINE__, tensorflow::INFO)
 
diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc
index 50dd0cd58b..c9b362f182 100644
--- a/tensorflow/core/platform/env_test.cc
+++ b/tensorflow/core/platform/env_test.cc
@@ -226,14 +226,28 @@ TEST_F(DefaultEnvTest, RecursivelyCreateDirSubdirsExist) {
 
 TEST_F(DefaultEnvTest, LocalFileSystem) {
   // Test filename with file:// syntax.
+  int expected_num_files = 0;
+  std::vector matching_paths;
   for (const int length : {0, 1, 1212, 2553, 4928, 8196, 9000, (1 << 20) - 1,
                            1 << 20, (1 << 20) + 1}) {
-    string filename = io::JoinPath(BaseDir(), strings::StrCat("file", length));
+    string filename = io::JoinPath(BaseDir(), strings::StrCat("len", length));
 
     filename = strings::StrCat("file://", filename);
 
     // Write a file with the given length
     const string input = CreateTestFile(env_, filename, length);
+    ++expected_num_files;
+
+    // Ensure that GetMatchingPaths works as intended.
+    TF_EXPECT_OK(env_->GetMatchingPaths(
+        // Try it with the "file://" URI scheme.
+        strings::StrCat("file://", io::JoinPath(BaseDir(), "l*")),
+        &matching_paths));
+    EXPECT_EQ(expected_num_files, matching_paths.size());
+    TF_EXPECT_OK(env_->GetMatchingPaths(
+        // Try it without any URI scheme.
+        io::JoinPath(BaseDir(), "l*"), &matching_paths));
+    EXPECT_EQ(expected_num_files, matching_paths.size());
 
     // Read the file back and check equality
     string output;
diff --git a/tensorflow/core/util/activation_mode.cc b/tensorflow/core/util/activation_mode.cc
index 4bf947a0a9..efb5ab146a 100644
--- a/tensorflow/core/util/activation_mode.cc
+++ b/tensorflow/core/util/activation_mode.cc
@@ -22,7 +22,9 @@ namespace tensorflow {
 
 Status GetActivationModeFromString(const string& str_value,
                                    ActivationMode* value) {
-  if (str_value == "Sigmoid") {
+  if (str_value == "None") {
+    *value = NONE;
+  } else if (str_value == "Sigmoid") {
     *value = SIGMOID;
   } else if (str_value == "Relu") {
     *value = RELU;
diff --git a/tensorflow/core/util/activation_mode.h b/tensorflow/core/util/activation_mode.h
index 2a8564847d..2e03ccd5c8 100644
--- a/tensorflow/core/util/activation_mode.h
+++ b/tensorflow/core/util/activation_mode.h
@@ -28,6 +28,7 @@ namespace tensorflow {
 
 // ActivationMode: the activation function we apply to the input tensor:
 enum ActivationMode {
+  NONE = 0,
   SIGMOID = 1,
   RELU = 2,
   RELU6 = 3,
diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md
index ba26bd5e94..aaebabfddf 100644
--- a/tensorflow/docs_src/programmers_guide/datasets.md
+++ b/tensorflow/docs_src/programmers_guide/datasets.md
@@ -146,6 +146,9 @@ for i in range(100):
   assert i == value
 ```
 
+Note: Currently, one-shot iterators are the only type that is easily usable
+with an `Estimator`.
+
 An **initializable** iterator requires you to run an explicit
 `iterator.initializer` operation before using it. In exchange for this
 inconvenience, it enables you to *parameterize* the definition of the dataset,
@@ -452,6 +455,9 @@ dataset = dataset.flat_map(
         .filter(lambda line: tf.not_equal(tf.substr(line, 0, 1), "#"))))
 ```
 
+For a full example of parsing a CSV file using datasets, see [`imports85.py`](https://www.tensorflow.org/code/tensorflow/examples/get_started/regression/imports85.py)
+in @{$get_started/linear_regression}.
+