From daa76e16e05c2b7a3521bf739670903d996d9a33 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Fri, 18 May 2018 11:37:53 -0700 Subject: [PATCH 001/380] enhancement with relu primitive reuse --- tensorflow/core/kernels/mkl_conv_ops.cc | 280 +++++----- tensorflow/core/kernels/mkl_relu_op.cc | 661 +++++++++++++++++++----- tensorflow/core/util/mkl_util.h | 32 +- 3 files changed, 702 insertions(+), 271 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index f2b14f1278..c032add82e 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -59,7 +59,8 @@ namespace tensorflow { #ifndef INTEL_MKL_ML -struct ConvFwdDimensions { +// This structure aggregates multiple inputs to Conv2DFwd* methods. +struct MklConvFwdParams { memory::dims src_dims; memory::dims filter_dims; memory::dims bias_dims; @@ -69,7 +70,7 @@ struct ConvFwdDimensions { memory::dims padding_left; memory::dims padding_right; - ConvFwdDimensions(memory::dims src_dims, + MklConvFwdParams(memory::dims src_dims, memory::dims filter_dims, memory::dims bias_dims, memory::dims dst_dims, memory::dims strides, memory::dims dilations, memory::dims padding_left, @@ -82,35 +83,40 @@ struct ConvFwdDimensions { }; template -class Conv2DFwd : public DnnOp { +class MklConv2DFwdPrimitive: public MklPrimitive { public: - explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) { - fwd_stream_.reset(new stream(stream::kind::eager)); + explicit MklConv2DFwdPrimitive(const MklConvFwdParams& convFwdDims) { + context_.fwd_stream.reset(new stream(stream::kind::eager)); // create conv primitive - if (conv_fwd_ == nullptr) { + if (context_.conv_fwd == nullptr) { Setup(convFwdDims); } } - ~Conv2DFwd() {} + ~MklConv2DFwdPrimitive() {} // Convolution forward execute with bias // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // bias_data: input data buffer of bias // dst_data: output data buffer of dst - void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) { - src_mem_->set_data_handle(static_cast(src_data)); - filter_mem_->set_data_handle(static_cast(filter_data)); - bias_mem_->set_data_handle(static_cast(bias_data)); - dst_mem_->set_data_handle(static_cast(dst_data)); - fwd_stream_->submit(fwd_primitives_); + void Execute(const T* src_data, const T* filter_data, + const T* bias_data, const T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.filter_mem->set_data_handle( + static_cast(const_cast(filter_data))); + context_.bias_mem->set_data_handle( + static_cast(const_cast(bias_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + context_.fwd_stream->submit(context_.fwd_primitives); // after exec, set data handle back - src_mem_->set_data_handle(DummyData); - filter_mem_->set_data_handle(DummyData); - bias_mem_->set_data_handle(DummyData); - dst_mem_->set_data_handle(DummyData); + context_.src_mem->set_data_handle(DummyData); + context_.filter_mem->set_data_handle(DummyData); + context_.bias_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); return; } @@ -119,139 +125,174 @@ class Conv2DFwd : public DnnOp { // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // dst_data: output data buffer of dst - void Execute(T* src_data, T* filter_data, T* dst_data) { - src_mem_->set_data_handle(static_cast(src_data)); - filter_mem_->set_data_handle(static_cast(filter_data)); - dst_mem_->set_data_handle(static_cast(dst_data)); - fwd_stream_->submit(fwd_primitives_); - - // after exec, set data handle back - src_mem_->set_data_handle(DummyData); - filter_mem_->set_data_handle(DummyData); - dst_mem_->set_data_handle(DummyData); + void Execute(const T* src_data, const T* filter_data, + const T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.filter_mem->set_data_handle( + static_cast(const_cast(filter_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + context_.fwd_stream->submit(context_.fwd_primitives); + + // after execution, set data handle back + context_.src_mem->set_data_handle(DummyData); + context_.filter_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); return; } - // expected memory format for this primitive instance - memory::format src_fmt_; - memory::format filter_fmt_; + memory::format GetSrcMemoryFormat() const { + return context_.src_fmt; + } + + memory::format GetFilterMemoryFormat() const { + return context_.filter_fmt; + } - // convolution primitive - std::shared_ptr fwd_pd_; - std::shared_ptr conv_fwd_; + std::shared_ptr + GetPrimitiveDesc() const { + return context_.fwd_pd; + } private: - void Setup(const ConvFwdDimensions& convFwdDims) { + // Primitive reuse context for Conv2D Fwd op + struct ConvFwdContext { + // expected memory format for this primitive instance + memory::format src_fmt; + memory::format filter_fmt; + + // MKLDNN memory + std::shared_ptr src_mem; + std::shared_ptr filter_mem; + std::shared_ptr bias_mem; + std::shared_ptr dst_mem; + + // desc & prmitive desc + std::shared_ptr fwd_desc; + + // memory desc + std::shared_ptr src_md; + std::shared_ptr filter_md; + std::shared_ptr bias_md; + std::shared_ptr dst_md; + + // convolution primitive + std::shared_ptr fwd_pd; + std::shared_ptr conv_fwd; + + std::shared_ptr fwd_stream; + std::vector fwd_primitives; + + ConvFwdContext() : + src_fmt(memory::format::any), filter_fmt(memory::format::any), + src_mem(nullptr), filter_mem(nullptr), bias_mem(nullptr), + dst_mem(nullptr), fwd_desc(nullptr), + src_md(nullptr), filter_md(nullptr), bias_md(nullptr), + fwd_pd(nullptr), conv_fwd(nullptr), fwd_stream(nullptr) { + } + } context_; + + engine cpu_engine_ = engine(engine::cpu, 0); + + void Setup(const MklConvFwdParams& convFwdDims) { // create memory descriptors for convolution data w/ no specified format - src_md_.reset(new memory::desc({convFwdDims.src_dims}, + context_.src_md.reset(new memory::desc({convFwdDims.src_dims}, MklDnnType(), memory::format::any)); - filter_md_.reset(new memory::desc({convFwdDims.filter_dims}, + context_.filter_md.reset(new memory::desc({convFwdDims.filter_dims}, MklDnnType(), memory::format::any)); - dst_md_.reset(new memory::desc({convFwdDims.dst_dims}, + context_.dst_md.reset(new memory::desc({convFwdDims.dst_dims}, MklDnnType(), memory::format::any)); if (!convFwdDims.bias_dims.empty()) - bias_md_.reset(new memory::desc({convFwdDims.bias_dims}, + context_.bias_md.reset(new memory::desc({convFwdDims.bias_dims}, MklDnnType(), memory::format::any)); // create a convolution if (!convFwdDims.bias_dims.empty()) { - fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_, + context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *context_.src_md, *context_.filter_md, + *context_.bias_md, *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, convFwdDims.padding_right, padding_kind::zero)); } else { - fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *src_md_, *filter_md_, *dst_md_, - convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, - convFwdDims.padding_right, padding_kind::zero)); + context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *context_.src_md, *context_.filter_md, + *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, + convFwdDims.padding_left, convFwdDims.padding_right, + padding_kind::zero)); } - fwd_pd_.reset(new convolution_forward::primitive_desc( - *fwd_desc_, cpu_engine_)); + context_.fwd_pd.reset(new convolution_forward::primitive_desc( + *context_.fwd_desc, cpu_engine_)); // store the expected memory format - src_fmt_ = static_cast( - fwd_pd_.get()->src_primitive_desc().desc().data.format); + context_.src_fmt = static_cast( + context_.fwd_pd.get()->src_primitive_desc().desc().data.format); - filter_fmt_ = static_cast( - fwd_pd_.get()->weights_primitive_desc().desc().data.format); + context_.filter_fmt = static_cast( + context_.fwd_pd.get()->weights_primitive_desc().desc().data.format); // create memory primitive based on dummy data - src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData)); - filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), - DummyData)); - dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData)); + context_.src_mem.reset(new memory( + context_.fwd_pd.get()->src_primitive_desc(), DummyData)); + context_.filter_mem.reset(new memory( + context_.fwd_pd.get()->weights_primitive_desc(), DummyData)); + context_.dst_mem.reset(new memory( + context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); // create convolution primitive and add it to net if (!convFwdDims.bias_dims.empty()) { - bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType(), - memory::format::x}, cpu_engine_}, DummyData)); - conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, - *filter_mem_, *bias_mem_, *dst_mem_)); + context_.bias_mem.reset(new memory({{{convFwdDims.bias_dims}, + MklDnnType(), memory::format::x}, cpu_engine_}, DummyData)); + context_.conv_fwd.reset(new convolution_forward( + *context_.fwd_pd, *context_.src_mem, *context_.filter_mem, + *context_.bias_mem, *context_.dst_mem)); } else { - conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, - *filter_mem_, *dst_mem_)); + context_.conv_fwd.reset(new convolution_forward( + *context_.fwd_pd, *context_.src_mem, + *context_.filter_mem, *context_.dst_mem)); } - fwd_primitives_.push_back(*conv_fwd_); + context_.fwd_primitives.push_back(*context_.conv_fwd); return; } - - // MKLDNN memory - std::shared_ptr src_mem_; - std::shared_ptr filter_mem_; - std::shared_ptr bias_mem_; - std::shared_ptr dst_mem_; - - std::shared_ptr fwd_stream_; - std::vector fwd_primitives_; - - // desc & prmitive desc - std::shared_ptr fwd_desc_; - - // memory desc - std::shared_ptr src_md_; - std::shared_ptr filter_md_; - std::shared_ptr bias_md_; - std::shared_ptr dst_md_; - - engine cpu_engine_ = engine(engine::cpu, 0); }; template -class Conv2DFwdFactory : public DnnOpFactory { +class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { public: - static Conv2DFwd* Get(const ConvFwdDimensions& convFwdDims) { - Conv2DFwd* conv2d_fwd = nullptr; + static MklConv2DFwdPrimitive* Get(const MklConvFwdParams& convFwdDims) { + MklConv2DFwdPrimitive* conv2d_fwd = nullptr; // try to find a suitable one in pool - conv2d_fwd = dynamic_cast*> ( - Conv2DFwdFactory::GetInstance().GetConv2DFwd(convFwdDims)); + conv2d_fwd = dynamic_cast*> ( + MklConv2DFwdPrimitiveFactory::GetInstance().GetConv2DFwd( + convFwdDims)); if (conv2d_fwd == nullptr) { - conv2d_fwd = new Conv2DFwd(convFwdDims); - Conv2DFwdFactory::GetInstance().SetConv2DFwd( + conv2d_fwd = new MklConv2DFwdPrimitive(convFwdDims); + MklConv2DFwdPrimitiveFactory::GetInstance().SetConv2DFwd( convFwdDims, conv2d_fwd); } return conv2d_fwd; } private: - Conv2DFwdFactory() {} - ~Conv2DFwdFactory() {} + MklConv2DFwdPrimitiveFactory() {} + ~MklConv2DFwdPrimitiveFactory() {} static const int kDilationH = 0, kDilationW = 1; - static Conv2DFwdFactory& GetInstance() { - static Conv2DFwdFactory instance_; + static MklConv2DFwdPrimitiveFactory& GetInstance() { + static MklConv2DFwdPrimitiveFactory instance_; return instance_; } - static std::string CreateKey(const ConvFwdDimensions& convFwdDims) { + static std::string CreateKey(const MklConvFwdParams& convFwdDims) { std::string prefix = "conv2d_fwd_"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -266,12 +307,12 @@ class Conv2DFwdFactory : public DnnOpFactory { return key_creator.GetKey(); } - DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) { + MklPrimitive* GetConv2DFwd(const MklConvFwdParams& convFwdDims) { std::string key = CreateKey(convFwdDims); return this->GetOp(key); } - void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) { + void SetConv2DFwd(const MklConvFwdParams& convFwdDims, MklPrimitive *op) { std::string key = CreateKey(convFwdDims); this->SetOp(key, op); } @@ -762,7 +803,6 @@ class MklConv2DOp : public OpKernel { MklDnnData src(&cpu_engine); MklDnnData filter(&cpu_engine); - MklDnnData dst(&cpu_engine); // output memory::dims src_dims, filter_dims, padding_left, padding_right, dilations, strides; @@ -812,7 +852,6 @@ class MklConv2DOp : public OpKernel { auto src_md = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetMklLayout() : memory::desc(src_dims, MklDnnType(), tf_fmt); - src.SetUsrMem(src_md, &src_tensor); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). @@ -820,29 +859,28 @@ class MklConv2DOp : public OpKernel { ? filter_mkl_shape.GetMklLayout() : memory::desc(filter_dims, MklDnnType(), memory::format::hwio); - filter.SetUsrMem(filter_md, &filter_tensor); // MKLDNN dilation starts from 0. dilations[kDilationH] -= 1; dilations[kDilationW] -= 1; // get a conv2d fwd from primitive pool - Conv2DFwd *conv2d_fwd = nullptr; + MklConv2DFwdPrimitive *conv2d_fwd = nullptr; if (biasEnabled) { memory::dims bias_dims = {}; conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims); - ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims, + MklConvFwdParams convFwdDims(src_dims, filter_dims, bias_dims, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); + conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); } else { - ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS, + MklConvFwdParams convFwdDims(src_dims, filter_dims, NONE_DIMS, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); + conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); } // allocate output tensors output_tensor and filter_out_tensor std::shared_ptr - conv_fwd_pd = conv2d_fwd->fwd_pd_; + conv_fwd_pd = conv2d_fwd->GetPrimitiveDesc(); AllocateOutputTensor(context, *conv_fwd_pd, dst_dims_mkl_order, tf_fmt, &dst_tensor); Tensor* filter_out_tensor = nullptr; @@ -854,20 +892,30 @@ class MklConv2DOp : public OpKernel { // check whether src/filter need reorder std::vector net; - if (src_md.data.format != conv2d_fwd->src_fmt_) - src.CheckReorderToOpMem( - conv_fwd_pd.get()->src_primitive_desc(), &net); - - if (filter_md.data.format != conv2d_fwd->filter_fmt_) - filter.CheckReorderToOpMem( - conv_fwd_pd.get()->weights_primitive_desc(), - filter.GetTensorBuffer(filter_out_tensor), &net); + T *src_data = nullptr; + if (src_md.data.format != conv2d_fwd->GetSrcMemoryFormat()) { + src.SetUsrMem(src_md, &src_tensor); + src.CheckReorderToOpMem( + conv_fwd_pd.get()->src_primitive_desc(), &net); + src_data = static_cast(src.GetOpMem().get_data_handle()); + } else { + src_data = static_cast(const_cast( + src_tensor.flat().data())); + } + T *filter_data = nullptr; + if (filter_md.data.format != conv2d_fwd->GetFilterMemoryFormat()) { + filter.SetUsrMem(filter_md, &filter_tensor); + filter.CheckReorderToOpMem( + conv_fwd_pd.get()->weights_primitive_desc(), + filter.GetTensorBuffer(filter_out_tensor), &net); + filter_data = static_cast(filter.GetOpMem().get_data_handle()); + } else { + filter_data = static_cast(const_cast( + filter_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); - T* src_data = static_cast( - src.GetOpMem().get_data_handle()); - T* filter_data = static_cast( - filter.GetOpMem().get_data_handle()); // execute convolution if (biasEnabled) { diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 1ed43834dd..048d4883b2 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -25,6 +25,7 @@ limitations under the License. #include "mkl_dnn.h" #include "mkl_dnn_types.h" +#include "tensorflow/core/platform/default/logging.h" #include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML @@ -38,10 +39,406 @@ using mkldnn::prop_kind; using mkldnn::relu_backward; using mkldnn::relu_forward; using mkldnn::stream; +using mkldnn::memory; #endif namespace tensorflow { +#ifndef INTEL_MKL_ML + +template +class MklEltwiseFwdParams { + public: + memory::dims src_dims; // check if this is needed + memory::desc src_md; + algorithm alg_kind; + T alpha; + T beta; + + MklEltwiseFwdParams(memory::dims src_dims, memory::desc src_md, + algorithm alg_kind, T alpha, T beta) : + src_dims(src_dims), src_md(src_md), + alg_kind(alg_kind), alpha(alpha), beta(beta) { + } +}; + +template +class MklEltwiseFwdPrimitive : public MklPrimitive { + public: + explicit MklEltwiseFwdPrimitive(const MklEltwiseFwdParams& fwdParams) { + // store expected format + context_.src_fmt = static_cast( + fwdParams.src_md.data.format); + context_.fwd_stream.reset(new stream(stream::kind::eager)); + + // create eltwise primitive + if (context_.eltwise_fwd == nullptr) { + Setup(fwdParams); + } + } + + ~MklEltwiseFwdPrimitive() {} + + // Eltwise forward execute + // src_data: input data buffer of src + // dst_data: output data buffer of dst + void Execute(T* src_data, T* dst_data) { + context_.src_mem->set_data_handle(static_cast(src_data)); + context_.dst_mem->set_data_handle(static_cast(dst_data)); + context_.fwd_stream->submit(context_.fwd_primitives); + + // after execution, set data handle back + context_.src_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); + return; + } + + std::shared_ptr GetEltwiseFwdPd() { + return context_.fwd_pd; + } + + memory::format GetSrcMemoryFormat() { + return context_.src_fmt; + } + + private: + // Primitive reuse context for eltwise Fwd ops: Relu, Elu, Tanh + struct EltwiseFwdContext { + // expected memory format for this primitive instance + mkldnn::memory::format src_fmt; + + // MKLDNN memory + std::shared_ptr src_mem; + std::shared_ptr dst_mem; + + // desc & prmitive desc + std::shared_ptr fwd_desc; + std::shared_ptr fwd_pd; + + // memory desc + std::shared_ptr src_md; + std::shared_ptr dst_md; + + // memory primitive desc + std::shared_ptr src_mpd; + + // Eltwise primitive + std::shared_ptr eltwise_fwd; + + std::shared_ptr fwd_stream; + std::vector fwd_primitives; + + EltwiseFwdContext() : + src_fmt(memory::format::any), src_mem(nullptr), dst_mem(nullptr), + fwd_desc(nullptr), fwd_pd(nullptr), src_md(nullptr), dst_md(nullptr), + src_mpd(nullptr), eltwise_fwd(nullptr), fwd_stream(nullptr) { + } + } context_; + + // Eltwise forward primitive setup + void Setup(const MklEltwiseFwdParams& fwdParams) { + // create memory descriptors for eltwise data with specified format + context_.src_md.reset(new memory::desc(fwdParams.src_md.data)); + context_.src_mpd.reset(new memory::primitive_desc( + *context_.src_md, cpu_engine_)); + + // create a eltwise + context_.fwd_desc.reset(new mkldnn::eltwise_forward::desc( + prop_kind::forward, fwdParams.alg_kind, *context_.src_md, + fwdParams.alpha, fwdParams.beta)); + context_.fwd_pd.reset(new mkldnn::eltwise_forward::primitive_desc( + *context_.fwd_desc, cpu_engine_)); + + // create memory primitive based on dummy data + context_.src_mem.reset(new memory(*context_.src_mpd, DummyData)); + context_.dst_mem.reset(new memory( + context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); + + // create eltwise primitive and add it to net + context_.eltwise_fwd.reset(new mkldnn::eltwise_forward(*context_.fwd_pd, + *context_.src_mem, *context_.dst_mem)); + + context_.fwd_primitives.push_back(*context_.eltwise_fwd); + return; + } + + engine cpu_engine_ = engine(engine::cpu, 0); +}; + +template +class MklEltwiseFwdPrimitiveFactory : public MklPrimitiveFactory { + public: + static MklEltwiseFwdPrimitive* Get( + const MklEltwiseFwdParams& fwdParams) { + MklEltwiseFwdPrimitive* eltwise_forward = nullptr; + + auto src_fmt = static_cast( + fwdParams.src_md.data.format); + + // Get a eltwise fwd primitive from the cached pool + eltwise_forward = static_cast*>( + MklEltwiseFwdPrimitiveFactory::GetInstance().GetEltwiseFwd( + fwdParams, src_fmt)); + if (eltwise_forward == nullptr) { + eltwise_forward = new MklEltwiseFwdPrimitive(fwdParams); + MklEltwiseFwdPrimitiveFactory::GetInstance().SetEltwiseFwd( + fwdParams, src_fmt, eltwise_forward); + } + return eltwise_forward; + } + + static MklEltwiseFwdPrimitiveFactory& GetInstance() { + static MklEltwiseFwdPrimitiveFactory instance_; + return instance_; + } + + private: + MklEltwiseFwdPrimitiveFactory() {} + ~MklEltwiseFwdPrimitiveFactory() {} + + static std::string CreateKey( + const MklEltwiseFwdParams& fwdParams, memory::format src_fmt) { + std::string prefix = "eltwise_fwd"; + FactoryKeyCreator key_creator; + key_creator.AddAsKey(prefix); + key_creator.AddAsKey(fwdParams.src_dims); + key_creator.AddAsKey(static_cast(fwdParams.alg_kind)); + key_creator.AddAsKey(static_cast(fwdParams.alpha)); + key_creator.AddAsKey(static_cast(fwdParams.beta)); + key_creator.AddAsKey(static_cast(src_fmt)); + return key_creator.GetKey(); + } + + MklPrimitive* GetEltwiseFwd(const MklEltwiseFwdParams& fwdParams, + memory::format src_fmt) { + std::string key = CreateKey(fwdParams, src_fmt); + return this->GetOp(key); + } + + void SetEltwiseFwd(const MklEltwiseFwdParams& fwdParams, + memory::format src_fmt, MklPrimitive* op) { + std::string key = CreateKey(fwdParams, src_fmt); + this->SetOp(key, op); + } +}; + +template +class MklEltwiseBwdParams { + public: + memory::dims src_dims; + memory::desc common_md; + algorithm alg_kind; + T alpha; + T beta; + + MklEltwiseBwdParams(const memory::dims &src_dims, + const memory::desc &common_md, + algorithm alg_kind, T alpha, T beta) : + src_dims(src_dims), common_md(common_md), + alg_kind(alg_kind), alpha(alpha), beta(beta) { + } +}; + +template +class MklEltwiseBwdPrimitive : public MklPrimitive { + public: + explicit MklEltwiseBwdPrimitive(const MklEltwiseBwdParams& bwdParams) { + context_.src_fmt = static_cast( + bwdParams.common_md.data.format); + context_.diff_dst_fmt = static_cast( + bwdParams.common_md.data.format); + context_.bwd_stream.reset(new stream(stream::kind::eager)); + // create eltwise primitive + if (context_.eltwise_bwd == nullptr) { + Setup(bwdParams); + } + } + + ~MklEltwiseBwdPrimitive() {} + + // Eltwise backward execute + // src_data: input data buffer of src + // diff_dst_data: input data buffer of diff_dst + // diff_src_data: output data buffer of diff_src + + void Execute(T* src_data, T* diff_dst_data, T* diff_src_data) { + context_.src_mem->set_data_handle(static_cast(src_data)); + context_.diff_dst_mem->set_data_handle(static_cast(diff_dst_data)); + context_.diff_src_mem->set_data_handle(static_cast(diff_src_data)); + context_.bwd_stream->submit(context_.bwd_primitives); + + // after execution, set data handle back + context_.src_mem->set_data_handle(DummyData); + context_.diff_dst_mem->set_data_handle(DummyData); + context_.diff_src_mem->set_data_handle(DummyData); + return; + } + + std::shared_ptr GetEltwiseBwdPd() { + return context_.bwd_pd; + } + + memory::format GetSrcMemoryFormat() { + return context_.src_fmt; + } + + memory::format GetDiffDstMemoryFormat() { + return context_.diff_dst_fmt; + } + + private: + // Primitive reuse context for eltwise Bwd ops: Relu, Elu, Tanh + struct EltwiseBwdContext { + // expected memory format for this primitive instance + memory::format src_fmt; + memory::format diff_dst_fmt; + + // MKLDNN memory + std::shared_ptr src_mem; + std::shared_ptr diff_dst_mem; + std::shared_ptr diff_src_mem; + + // desc & prmitive desc + std::shared_ptr bwd_desc; + + // memory desc + std::shared_ptr src_md; + std::shared_ptr diff_dst_md; + std::shared_ptr common_md; + + // memory primitive desc + std::shared_ptr src_mpd; + std::shared_ptr diff_dst_mpd; + + // fwd primitive desc + std::shared_ptr fwd_desc; + std::shared_ptr fwd_pd; + std::shared_ptr bwd_pd; + + // Eltwise primitive + std::shared_ptr eltwise_bwd; + + std::shared_ptr bwd_stream; + std::vector bwd_primitives; + + EltwiseBwdContext() : + src_fmt(memory::format::any), diff_dst_fmt(memory::format::any), + src_mem(nullptr), diff_dst_mem(nullptr), diff_src_mem(nullptr), + src_md(nullptr), diff_dst_md(nullptr), common_md(nullptr), + src_mpd(nullptr), diff_dst_mpd(nullptr), + fwd_desc(nullptr), fwd_pd(nullptr), bwd_pd(nullptr), + eltwise_bwd(nullptr), bwd_stream(nullptr) { + } + } context_; + + // Eltwise backward primitive setup + void Setup(const MklEltwiseBwdParams& bwdParams) { + // create memory descriptors for eltwise data w/ no specified format + context_.src_md.reset(new memory::desc(bwdParams.common_md.data)); + context_.diff_dst_md.reset(new memory::desc(bwdParams.common_md.data)); + + context_.src_mpd.reset(new memory::primitive_desc( + *context_.src_md, cpu_engine_)); + context_.diff_dst_mpd.reset(new memory::primitive_desc( + *context_.diff_dst_md, cpu_engine_)); + + // create forward eltwise primitive + context_.fwd_desc.reset(new mkldnn::eltwise_forward::desc( + prop_kind::forward_training, bwdParams.alg_kind, + *context_.src_md, bwdParams.alpha, bwdParams.beta)); + context_.fwd_pd.reset(new mkldnn::eltwise_forward::primitive_desc( + *context_.fwd_desc, cpu_engine_)); + context_.bwd_desc.reset(new mkldnn::eltwise_backward::desc( + bwdParams.alg_kind, *context_.diff_dst_md, + *context_.src_md, bwdParams.alpha, bwdParams.beta)); + context_.bwd_pd.reset(new mkldnn::eltwise_backward::primitive_desc( + *context_.bwd_desc, cpu_engine_, *context_.fwd_pd)); + + // create memory primitive based on dummy data + context_.src_mem.reset(new memory(*context_.src_mpd, DummyData)); + context_.diff_dst_mem.reset(new memory(*context_.diff_dst_mpd, DummyData)); + context_.diff_src_mem.reset(new memory( + context_.bwd_pd.get()->diff_src_primitive_desc(), DummyData)); + + // create eltwise primitive and add it to net + context_.eltwise_bwd.reset(new mkldnn::eltwise_backward(*context_.bwd_pd, + *context_.src_mem, *context_.diff_dst_mem, *context_.diff_src_mem)); + + context_.bwd_primitives.push_back(*context_.eltwise_bwd); + return; + } + + engine cpu_engine_ = engine(engine::cpu, 0); +}; + + +template +class MklEltwiseBwdPrimitiveFactory : public MklPrimitiveFactory { + private: + MklEltwiseBwdPrimitiveFactory() {} + ~MklEltwiseBwdPrimitiveFactory() {} + + public: + static MklEltwiseBwdPrimitive* Get( + const MklEltwiseBwdParams& bwdParams) { + MklEltwiseBwdPrimitive* eltwise_backward = nullptr; + + auto src_fmt = static_cast( + bwdParams.common_md.data.format); + auto diff_dst_fmt = static_cast( + bwdParams.common_md.data.format); + + // try to find a suitable one in pool + eltwise_backward = static_cast*> ( + MklEltwiseBwdPrimitiveFactory::GetInstance().GetEltwiseBwd( + bwdParams, src_fmt, diff_dst_fmt)); + + if (eltwise_backward == nullptr) { + eltwise_backward = new MklEltwiseBwdPrimitive(bwdParams); + MklEltwiseBwdPrimitiveFactory::GetInstance().SetEltwiseBwd( + bwdParams, src_fmt, diff_dst_fmt, eltwise_backward); + } + return eltwise_backward; + } + + static MklEltwiseBwdPrimitiveFactory& GetInstance() { + static MklEltwiseBwdPrimitiveFactory instance_; + return instance_; + } + + private: + static std::string CreateKey( + const MklEltwiseBwdParams& bwdParams, + const memory::format &src_fmt, + const memory::format &diff_dst_fmt) { + std::string prefix = "eltwise_bwd"; + FactoryKeyCreator key_creator; + key_creator.AddAsKey(prefix); + key_creator.AddAsKey(bwdParams.src_dims); + key_creator.AddAsKey(static_cast(bwdParams.alg_kind)); + key_creator.AddAsKey(static_cast(bwdParams.alpha)); + key_creator.AddAsKey(static_cast(bwdParams.beta)); + key_creator.AddAsKey(static_cast(src_fmt)); + key_creator.AddAsKey(static_cast(diff_dst_fmt)); + return key_creator.GetKey(); + } + + MklPrimitive* GetEltwiseBwd(const MklEltwiseBwdParams& bwdParams, + const memory::format &src_fmt, const memory::format &diff_dst_fmt) { + std::string key = CreateKey(bwdParams, src_fmt, diff_dst_fmt); + return this->GetOp(key); + } + + void SetEltwiseBwd(const MklEltwiseBwdParams& bwdParams, + const memory::format &src_fmt, + const memory::format &diff_dst_fmt, MklPrimitive *op) { + std::string key = CreateKey(bwdParams, src_fmt, diff_dst_fmt); + this->SetOp(key, op); + } +}; + +#endif + typedef Eigen::ThreadPoolDevice CPUDevice; struct MklReluHelpers { @@ -367,104 +764,111 @@ void MklReluGradOp::Compute(OpKernelContext* context) { mkl_context.MklCleanup(); } - - #else // INTEL_MKL_ML - template class MklReluOpBase : public OpKernel { public: ~MklReluOpBase() {} explicit MklReluOpBase(OpKernelConstruction* context) : OpKernel(context) {} - virtual void Compute_Scalar(OpKernelContext* context) = 0; void Compute(OpKernelContext* context) override { try { - auto cpu_engine = engine(engine::cpu, 0); const size_t src_index = 0; // index of src input tensor const size_t dst_index = 0; // index of dst output tensor const Tensor& src_tensor = MklGetInput(context, src_index); MklDnnShape dnn_shape_src; GetMklShape(context, src_index, &dnn_shape_src); - Tensor* dst_tensor = nullptr; if (src_tensor.dims() == 0) { - Compute_Scalar(context); // scalar case doesn't use in-place operation + Compute_Scalar(context); return; } - // Create relu primitive. - MklDnnData src(&cpu_engine); - MklDnnData dst(&cpu_engine); - // Set DNN primitive - src + MklDnnData src(&cpu_engine); + memory::dims src_dims; memory::desc src_md({}, memory::data_undef, memory::format_undef); if (dnn_shape_src.IsMklTensor()) { src_md = dnn_shape_src.GetMklLayout(); + src_dims = dnn_shape_src.GetSizesAsMklDnnDims(); } else { - auto src_dims = TFShapeToMklDnnDims(src_tensor.shape()); + src_dims = TFShapeToMklDnnDims(src_tensor.shape()); auto src_strides = CalculateTFStrides(src_dims); // Create blocked memory descriptor src_md = MklDnnData::CreateBlockedMemDesc(src_dims, src_strides); } - src.SetUsrMem(src_md, &src_tensor); T alpha = 0, beta = 0; - std::shared_ptr relu_fwd_pd; - auto relu_fwd_desc = relu_forward::desc( - prop_kind::forward_training, - // Operator memory descriptor is same as user memory descriptor. - alg_kind, src.GetUsrMemDesc(), alpha, beta); - relu_fwd_pd.reset( - new relu_forward::primitive_desc(relu_fwd_desc, cpu_engine)); - - // allocate dst tensor + + // get a eltwise fwd from primitive pool + MklEltwiseFwdParams fwdParams(src_dims, src_md, + alg_kind, alpha, beta); + MklEltwiseFwdPrimitive *eltwise_fwd = + MklEltwiseFwdPrimitiveFactory::Get(fwdParams); + + // prepare for execuation + T* src_data = nullptr; + // check wehther src need to reorder + if (src_md.data.format != eltwise_fwd->GetSrcMemoryFormat()) { + src.SetUsrMem(src_md, &src_tensor); + std::vector net; + auto src_target_pd = memory::primitive_desc({{src_dims}, + MklDnnType(), eltwise_fwd->GetSrcMemoryFormat()}, cpu_engine); + src.CheckReorderToOpMem(src_target_pd, &net); + stream(stream::kind::eager).submit(net).wait(); + src_data = static_cast(src.GetOpMem().get_data_handle()); + } else { + src_data = static_cast( + const_cast(src_tensor.flat().data())); + } + + // allocate dst tensor, always set it as MKL-DNN layout + std::shared_ptr + eltwise_fwd_pd = eltwise_fwd->GetEltwiseFwdPd(); MklDnnShape dnn_shape_dst; TensorShape tf_shape_dst; if (dnn_shape_src.IsMklTensor()) { dnn_shape_dst.SetMklTensor(true); - auto dst_pd = relu_fwd_pd->dst_primitive_desc(); + auto dst_pd = eltwise_fwd_pd->dst_primitive_desc(); dnn_shape_dst.SetMklLayout(&dst_pd); dnn_shape_dst.SetElemType(MklDnnType()); dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(), dnn_shape_src.GetSizesAsMklDnnDims(), dnn_shape_src.GetTfDataFormat()); - tf_shape_dst.AddDim(dst_pd.get_size() / sizeof(T)); + tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T)); } else { + // TODO(yli135): why relu's input is TF tensor in VGG16?? dnn_shape_dst.SetMklTensor(false); tf_shape_dst = src_tensor.shape(); } - - // Allocate output and MklDnnShape tensors separately for possible - // in-place operation + + Tensor* dst_tensor = nullptr; OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {static_cast(src_index)}, - static_cast(dst_index), - tf_shape_dst, &dst_tensor)); + {src_index}, dst_index, tf_shape_dst, &dst_tensor)); AllocateOutputSetMklShape(context, dst_index, dnn_shape_dst); - // Destination memory descriptor is same as source memory descriptor. - auto &dst_md = src_md; - dst.SetUsrMem(dst_md, dst_tensor); + T* dst_data = static_cast(const_cast( + dst_tensor->flat().data())); - // execute net - std::vector net; - auto relu_fwd = - relu_forward(*relu_fwd_pd, src.GetOpMem(), dst.GetOpMem()); - net.push_back(relu_fwd); - stream(stream::kind::eager).submit(net).wait(); - } catch (mkldnn::error& e) { + // execute eltwise + eltwise_fwd->Execute(src_data, dst_data); + } catch (mkldnn::error &e) { string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); + ", message: " + string(e.message) + + ", in file " + string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, + errors::Aborted("Operation received an exception:", + error_msg)); } } + + private: + engine cpu_engine = engine(engine::cpu, 0); + std::shared_ptr relu_fwd_pd; }; template @@ -472,25 +876,25 @@ class MklReluGradOpBase : public OpKernel { public: ~MklReluGradOpBase() {} - explicit MklReluGradOpBase(OpKernelConstruction* context) - : OpKernel(context) {} + explicit MklReluGradOpBase(OpKernelConstruction* context) : + OpKernel(context) { + } virtual void Compute_Scalar(OpKernelContext* context) = 0; - void Compute(OpKernelContext* context) { + void Compute(OpKernelContext* context) { try { - auto cpu_engine = engine(engine::cpu, 0); + // auto cpu_engine = engine(engine::cpu, 0); MklDnnData src(&cpu_engine); MklDnnData diff_dst(&cpu_engine); - MklDnnData diff_src(&cpu_engine); const size_t diff_dst_index = 0; // index of diff_dst input tensor const size_t src_index = 1; // index of src input tensor const size_t diff_src_index = 0; // index of diff_src output tensor - const Tensor& src_tensor = MklGetInput(context, src_index); + const Tensor& src_tensor = MklGetInput(context, src_index); const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index); - Tensor* diff_src_tensor = nullptr; + Tensor* diff_src_tensor = nullptr; MklDnnShape dnn_shape_src, dnn_shape_diff_dst; GetMklShape(context, src_index, &dnn_shape_src); @@ -498,37 +902,23 @@ class MklReluGradOpBase : public OpKernel { int src_dims_size = src_tensor.dims(); if (src_dims_size == 0) { - Compute_Scalar(context); // scalar case doesn't use in-place operation + Compute_Scalar(context); return; } - // Set DNN primitives for src & diff_dst + // get a eltwise bwd from primitive pool + memory::dims src_dims = {}; memory::desc src_md({}, memory::data_undef, memory::format_undef); memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef); - - // For creating Sum primitive, we need to ensure that all inputs are in - // same format. What that means is if we have a mixed input case - where - // one input is in Tensorflow format and one input is in MKL format -, - // then we need to ensure that all inputs are in same format for - // primitive construction. For performance reason, we say that all inputs - // are in MKL format in such case, and insert reorder for input that is - // in Tensorflow format into MKL format. On the other hand, if both the - // inputs are in MKL format or both are in Tensorflow format, then we - // dont need reorder. if (!dnn_shape_src.IsMklTensor() && !dnn_shape_diff_dst.IsMklTensor()) { - // If both the inputs are in Tensorflow format, we create blocked memory - // descriptor. - auto src_dims = TFShapeToMklDnnDims(src_tensor.shape()); + src_dims = TFShapeToMklDnnDims(src_tensor.shape()); auto src_strides = CalculateTFStrides(src_dims); src_md = MklDnnData::CreateBlockedMemDesc(src_dims, src_strides); diff_dst_md = src_md; } else if (dnn_shape_src.IsMklTensor() && !dnn_shape_diff_dst.IsMklTensor()) { - // If one input is in MKL format and other is in Tensorflow, then - // create respective descriptors describing the actual case. For input - // in Mkl format, we just get Mkl layout from MklDnnShape. For input in - // Tensorflow format, we create memory descriptor using data format. src_md = dnn_shape_src.GetMklLayout(); + src_dims = dnn_shape_src.GetSizesAsMklDnnDims(); memory::format src_mkl_data_format = dnn_shape_src.GetTfDataFormat(); auto src_tf_data_format = @@ -539,26 +929,23 @@ class MklReluGradOpBase : public OpKernel { memory::desc(diff_dst_dims, MklDnnType(), src_mkl_data_format); } else if (!dnn_shape_src.IsMklTensor() && dnn_shape_diff_dst.IsMklTensor()) { - // Same comment as above. diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); memory::format diff_dst_mkl_data_format = dnn_shape_diff_dst.GetTfDataFormat(); auto diff_dst_tf_data_format = MklDnnDataFormatToTFDataFormat(diff_dst_mkl_data_format); - auto src_dims = TFShapeToMklDnnDimsInNCHW(src_tensor.shape(), + src_dims = TFShapeToMklDnnDimsInNCHW(src_tensor.shape(), diff_dst_tf_data_format); src_md = memory::desc(src_dims, MklDnnType(), diff_dst_mkl_data_format); } else { - // If both the inputs are in MKL format, we use Mkl layout of the input - // tensors. src_md = dnn_shape_src.GetMklLayout(); diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); + src_dims = dnn_shape_src.GetSizesAsMklDnnDims(); } - src.SetUsrMem(src_md, &src_tensor); - diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); + T alpha = 0, beta = 0; // As per comment above, we tell MKLDNN that both the inputs are in same // format. So we set common memory descriptor in MKL format, if any of the @@ -573,83 +960,79 @@ class MklReluGradOpBase : public OpKernel { common_md = src_md; } - T alpha = 0, beta = 0; - std::shared_ptr relu_fwd_pd; - auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training, - alg_kind, src_md, alpha, beta); - relu_fwd_pd.reset( - new relu_forward::primitive_desc(relu_fwd_desc, cpu_engine)); - auto relu_bwd_desc = - relu_backward::desc(alg_kind, common_md, common_md, alpha, beta); - auto relu_bwd_pd = relu_backward::primitive_desc( - relu_bwd_desc, cpu_engine, *relu_fwd_pd); + MklEltwiseBwdParams bwdParams(src_dims, common_md, + alg_kind, alpha, beta); + MklEltwiseBwdPrimitive *eltwise_bwd = + MklEltwiseBwdPrimitiveFactory::Get(bwdParams); + auto eltwise_bwd_pd = eltwise_bwd->GetEltwiseBwdPd(); + + // check whether need reorder for src / diff_dst + T* src_data; + T* diff_dst_data; + std::vector net; + if (src_md.data.format != eltwise_bwd->GetSrcMemoryFormat()) { + src.SetUsrMem(src_md, &src_tensor); + src.CheckReorderToOpMem( + eltwise_bwd_pd.get()->diff_src_primitive_desc(), &net); + src_data = static_cast(src.GetOpMem().get_data_handle()); + } else { + src_data = static_cast( + const_cast(src_tensor.flat().data())); + } + + if (diff_dst_md.data.format != eltwise_bwd->GetDiffDstMemoryFormat()) { + diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); + diff_dst.CheckReorderToOpMem( + eltwise_bwd_pd.get()->diff_src_primitive_desc(), &net); + diff_dst_data = static_cast( + diff_dst.GetOpMem().get_data_handle()); + } else { + diff_dst_data = static_cast(const_cast( + diff_dst_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor() || - dnn_shape_diff_dst.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor()) { + auto diff_src_pd = eltwise_bwd_pd->diff_src_primitive_desc(); dnn_shape_diff_src.SetMklTensor(true); - auto diff_src_pd = relu_bwd_pd.diff_src_primitive_desc(); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - if (dnn_shape_src.IsMklTensor()) { - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), - dnn_shape_src.GetSizesAsMklDnnDims(), - dnn_shape_src.GetTfDataFormat()); - } else { - dnn_shape_diff_src.SetTfLayout(dnn_shape_diff_dst.GetDimension(), - dnn_shape_diff_dst.GetSizesAsMklDnnDims(), - dnn_shape_diff_dst.GetTfDataFormat()); - } - tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); + dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), + dnn_shape_src.GetSizesAsMklDnnDims(), + dnn_shape_src.GetTfDataFormat()); + tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); - // both src and diff_dst are TensorFlow layout, - // so it is ok to get TensorFlow shape. tf_shape_diff_src = src_tensor.shape(); } - // Allocate diff_src and MklDnnShape tensors separately for possible - // in-place operation - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {static_cast(diff_dst_index)}, - static_cast(diff_src_index), - tf_shape_diff_src, - &diff_src_tensor)); - AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src); - - // diff_src memory descriptor is same as memory descriptor for both - // inputs. - diff_src.SetUsrMem(common_md, diff_src_tensor); - - PrepareAndExecuteNet(relu_bwd_pd, &src, &diff_src, &diff_dst); - } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {diff_dst_index}, diff_src_index, tf_shape_diff_src, + &diff_src_tensor)); + AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src); + + T* diff_src_data = static_cast(const_cast( + diff_src_tensor->flat().data())); + + // execute eltwise bwd + eltwise_bwd->Execute(src_data, diff_dst_data, diff_src_data); + } catch (mkldnn::error &e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + + ", in file " + string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, + errors::Aborted("Operation received an exception:", + error_msg)); } } - void PrepareAndExecuteNet(const relu_backward::primitive_desc& relu_prim_desc, - MklDnnData* src, MklDnnData* diff_src, - MklDnnData* diff_dst) { - std::vector net; - - // Check if we need to reorder original input tensors into common_md layout - // that we set for primitive creation. diff_src_primitive_desc is same as - // common_md. - src->CheckReorderToOpMem(relu_prim_desc.diff_src_primitive_desc(), &net); - diff_dst->CheckReorderToOpMem(relu_prim_desc.diff_src_primitive_desc(), - &net); - - net.push_back(relu_backward(relu_prim_desc, src->GetOpMem(), - diff_dst->GetOpMem(), diff_src->GetOpMem())); - stream(stream::kind::eager).submit(net).wait(); - } + private: + engine cpu_engine = engine(engine::cpu, 0); + std::shared_ptr relu_fwd_pd; }; template diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 230b4278ca..c4b5e124fb 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1794,11 +1794,11 @@ class MklDnnData { } }; -/// Base class for operations with reuse of DNN primitives +/// Base class for operations with reuse of primitives /// -class DnnOp { +class MklPrimitive { public: - virtual ~DnnOp() {} + virtual ~MklPrimitive() {} // Dummy data. Its size, hard-coded as 256 here, does // not matter since MKL should never operate on this buffer. @@ -1806,33 +1806,33 @@ class DnnOp { }; const mkldnn::memory::dims NONE_DIMS = {}; -// This constant is used to declare dummy buffer (size), for MKL primitives + template -class DnnOpFactory { +class MklPrimitiveFactory { public: - DnnOpFactory() {} - ~DnnOpFactory() {} + MklPrimitiveFactory() {} + ~MklPrimitiveFactory() {} - DnnOp* GetOp(const std::string& key) { - auto stream_iter = DnnOpFactory::GetHashMap().find(key); - if (stream_iter == DnnOpFactory::GetHashMap().end()) { + MklPrimitive* GetOp(const std::string& key) { + auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); + if (stream_iter == MklPrimitiveFactory::GetHashMap().end()) { return nullptr; } else { return stream_iter->second; } } - void SetOp(const std::string& key, DnnOp* op) { - auto stream_iter = DnnOpFactory::GetHashMap().find(key); + void SetOp(const std::string& key, MklPrimitive* op) { + auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); - CHECK(stream_iter == DnnOpFactory::GetHashMap().end()); + CHECK(stream_iter == MklPrimitiveFactory::GetHashMap().end()); - DnnOpFactory::GetHashMap()[key] = op; + MklPrimitiveFactory::GetHashMap()[key] = op; } private: - static inline std::unordered_map &GetHashMap() { - static thread_local std::unordered_map map_; + static inline std::unordered_map &GetHashMap() { + static thread_local std::unordered_map map_; return map_; } }; -- GitLab From 2bcd873e839c66b2405226508286da371dd8afbe Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Mon, 21 May 2018 13:27:46 -0700 Subject: [PATCH 002/380] revert mkl_conv_ops.cc to avoid PR review confusion --- tensorflow/core/kernels/mkl_conv_ops.cc | 280 ++++++++++-------------- 1 file changed, 116 insertions(+), 164 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index c032add82e..f2b14f1278 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -59,8 +59,7 @@ namespace tensorflow { #ifndef INTEL_MKL_ML -// This structure aggregates multiple inputs to Conv2DFwd* methods. -struct MklConvFwdParams { +struct ConvFwdDimensions { memory::dims src_dims; memory::dims filter_dims; memory::dims bias_dims; @@ -70,7 +69,7 @@ struct MklConvFwdParams { memory::dims padding_left; memory::dims padding_right; - MklConvFwdParams(memory::dims src_dims, + ConvFwdDimensions(memory::dims src_dims, memory::dims filter_dims, memory::dims bias_dims, memory::dims dst_dims, memory::dims strides, memory::dims dilations, memory::dims padding_left, @@ -83,40 +82,35 @@ struct MklConvFwdParams { }; template -class MklConv2DFwdPrimitive: public MklPrimitive { +class Conv2DFwd : public DnnOp { public: - explicit MklConv2DFwdPrimitive(const MklConvFwdParams& convFwdDims) { - context_.fwd_stream.reset(new stream(stream::kind::eager)); + explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) { + fwd_stream_.reset(new stream(stream::kind::eager)); // create conv primitive - if (context_.conv_fwd == nullptr) { + if (conv_fwd_ == nullptr) { Setup(convFwdDims); } } - ~MklConv2DFwdPrimitive() {} + ~Conv2DFwd() {} // Convolution forward execute with bias // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // bias_data: input data buffer of bias // dst_data: output data buffer of dst - void Execute(const T* src_data, const T* filter_data, - const T* bias_data, const T* dst_data) { - context_.src_mem->set_data_handle( - static_cast(const_cast(src_data))); - context_.filter_mem->set_data_handle( - static_cast(const_cast(filter_data))); - context_.bias_mem->set_data_handle( - static_cast(const_cast(bias_data))); - context_.dst_mem->set_data_handle( - static_cast(const_cast(dst_data))); - context_.fwd_stream->submit(context_.fwd_primitives); + void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) { + src_mem_->set_data_handle(static_cast(src_data)); + filter_mem_->set_data_handle(static_cast(filter_data)); + bias_mem_->set_data_handle(static_cast(bias_data)); + dst_mem_->set_data_handle(static_cast(dst_data)); + fwd_stream_->submit(fwd_primitives_); // after exec, set data handle back - context_.src_mem->set_data_handle(DummyData); - context_.filter_mem->set_data_handle(DummyData); - context_.bias_mem->set_data_handle(DummyData); - context_.dst_mem->set_data_handle(DummyData); + src_mem_->set_data_handle(DummyData); + filter_mem_->set_data_handle(DummyData); + bias_mem_->set_data_handle(DummyData); + dst_mem_->set_data_handle(DummyData); return; } @@ -125,174 +119,139 @@ class MklConv2DFwdPrimitive: public MklPrimitive { // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // dst_data: output data buffer of dst - void Execute(const T* src_data, const T* filter_data, - const T* dst_data) { - context_.src_mem->set_data_handle( - static_cast(const_cast(src_data))); - context_.filter_mem->set_data_handle( - static_cast(const_cast(filter_data))); - context_.dst_mem->set_data_handle( - static_cast(const_cast(dst_data))); - context_.fwd_stream->submit(context_.fwd_primitives); - - // after execution, set data handle back - context_.src_mem->set_data_handle(DummyData); - context_.filter_mem->set_data_handle(DummyData); - context_.dst_mem->set_data_handle(DummyData); + void Execute(T* src_data, T* filter_data, T* dst_data) { + src_mem_->set_data_handle(static_cast(src_data)); + filter_mem_->set_data_handle(static_cast(filter_data)); + dst_mem_->set_data_handle(static_cast(dst_data)); + fwd_stream_->submit(fwd_primitives_); - return; - } + // after exec, set data handle back + src_mem_->set_data_handle(DummyData); + filter_mem_->set_data_handle(DummyData); + dst_mem_->set_data_handle(DummyData); - memory::format GetSrcMemoryFormat() const { - return context_.src_fmt; + return; } - memory::format GetFilterMemoryFormat() const { - return context_.filter_fmt; - } + // expected memory format for this primitive instance + memory::format src_fmt_; + memory::format filter_fmt_; - std::shared_ptr - GetPrimitiveDesc() const { - return context_.fwd_pd; - } + // convolution primitive + std::shared_ptr fwd_pd_; + std::shared_ptr conv_fwd_; private: - // Primitive reuse context for Conv2D Fwd op - struct ConvFwdContext { - // expected memory format for this primitive instance - memory::format src_fmt; - memory::format filter_fmt; - - // MKLDNN memory - std::shared_ptr src_mem; - std::shared_ptr filter_mem; - std::shared_ptr bias_mem; - std::shared_ptr dst_mem; - - // desc & prmitive desc - std::shared_ptr fwd_desc; - - // memory desc - std::shared_ptr src_md; - std::shared_ptr filter_md; - std::shared_ptr bias_md; - std::shared_ptr dst_md; - - // convolution primitive - std::shared_ptr fwd_pd; - std::shared_ptr conv_fwd; - - std::shared_ptr fwd_stream; - std::vector fwd_primitives; - - ConvFwdContext() : - src_fmt(memory::format::any), filter_fmt(memory::format::any), - src_mem(nullptr), filter_mem(nullptr), bias_mem(nullptr), - dst_mem(nullptr), fwd_desc(nullptr), - src_md(nullptr), filter_md(nullptr), bias_md(nullptr), - fwd_pd(nullptr), conv_fwd(nullptr), fwd_stream(nullptr) { - } - } context_; - - engine cpu_engine_ = engine(engine::cpu, 0); - - void Setup(const MklConvFwdParams& convFwdDims) { + void Setup(const ConvFwdDimensions& convFwdDims) { // create memory descriptors for convolution data w/ no specified format - context_.src_md.reset(new memory::desc({convFwdDims.src_dims}, + src_md_.reset(new memory::desc({convFwdDims.src_dims}, MklDnnType(), memory::format::any)); - context_.filter_md.reset(new memory::desc({convFwdDims.filter_dims}, + filter_md_.reset(new memory::desc({convFwdDims.filter_dims}, MklDnnType(), memory::format::any)); - context_.dst_md.reset(new memory::desc({convFwdDims.dst_dims}, + dst_md_.reset(new memory::desc({convFwdDims.dst_dims}, MklDnnType(), memory::format::any)); if (!convFwdDims.bias_dims.empty()) - context_.bias_md.reset(new memory::desc({convFwdDims.bias_dims}, + bias_md_.reset(new memory::desc({convFwdDims.bias_dims}, MklDnnType(), memory::format::any)); // create a convolution if (!convFwdDims.bias_dims.empty()) { - context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *context_.src_md, *context_.filter_md, - *context_.bias_md, *context_.dst_md, + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_, convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, convFwdDims.padding_right, padding_kind::zero)); } else { - context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *context_.src_md, *context_.filter_md, - *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, - convFwdDims.padding_left, convFwdDims.padding_right, - padding_kind::zero)); + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *filter_md_, *dst_md_, + convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, + convFwdDims.padding_right, padding_kind::zero)); } - context_.fwd_pd.reset(new convolution_forward::primitive_desc( - *context_.fwd_desc, cpu_engine_)); + fwd_pd_.reset(new convolution_forward::primitive_desc( + *fwd_desc_, cpu_engine_)); // store the expected memory format - context_.src_fmt = static_cast( - context_.fwd_pd.get()->src_primitive_desc().desc().data.format); + src_fmt_ = static_cast( + fwd_pd_.get()->src_primitive_desc().desc().data.format); - context_.filter_fmt = static_cast( - context_.fwd_pd.get()->weights_primitive_desc().desc().data.format); + filter_fmt_ = static_cast( + fwd_pd_.get()->weights_primitive_desc().desc().data.format); // create memory primitive based on dummy data - context_.src_mem.reset(new memory( - context_.fwd_pd.get()->src_primitive_desc(), DummyData)); - context_.filter_mem.reset(new memory( - context_.fwd_pd.get()->weights_primitive_desc(), DummyData)); - context_.dst_mem.reset(new memory( - context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); + src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData)); + filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), + DummyData)); + dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData)); // create convolution primitive and add it to net if (!convFwdDims.bias_dims.empty()) { - context_.bias_mem.reset(new memory({{{convFwdDims.bias_dims}, - MklDnnType(), memory::format::x}, cpu_engine_}, DummyData)); - context_.conv_fwd.reset(new convolution_forward( - *context_.fwd_pd, *context_.src_mem, *context_.filter_mem, - *context_.bias_mem, *context_.dst_mem)); + bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType(), + memory::format::x}, cpu_engine_}, DummyData)); + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *filter_mem_, *bias_mem_, *dst_mem_)); } else { - context_.conv_fwd.reset(new convolution_forward( - *context_.fwd_pd, *context_.src_mem, - *context_.filter_mem, *context_.dst_mem)); + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *filter_mem_, *dst_mem_)); } - context_.fwd_primitives.push_back(*context_.conv_fwd); + fwd_primitives_.push_back(*conv_fwd_); return; } + + // MKLDNN memory + std::shared_ptr src_mem_; + std::shared_ptr filter_mem_; + std::shared_ptr bias_mem_; + std::shared_ptr dst_mem_; + + std::shared_ptr fwd_stream_; + std::vector fwd_primitives_; + + // desc & prmitive desc + std::shared_ptr fwd_desc_; + + // memory desc + std::shared_ptr src_md_; + std::shared_ptr filter_md_; + std::shared_ptr bias_md_; + std::shared_ptr dst_md_; + + engine cpu_engine_ = engine(engine::cpu, 0); }; template -class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { +class Conv2DFwdFactory : public DnnOpFactory { public: - static MklConv2DFwdPrimitive* Get(const MklConvFwdParams& convFwdDims) { - MklConv2DFwdPrimitive* conv2d_fwd = nullptr; + static Conv2DFwd* Get(const ConvFwdDimensions& convFwdDims) { + Conv2DFwd* conv2d_fwd = nullptr; // try to find a suitable one in pool - conv2d_fwd = dynamic_cast*> ( - MklConv2DFwdPrimitiveFactory::GetInstance().GetConv2DFwd( - convFwdDims)); + conv2d_fwd = dynamic_cast*> ( + Conv2DFwdFactory::GetInstance().GetConv2DFwd(convFwdDims)); if (conv2d_fwd == nullptr) { - conv2d_fwd = new MklConv2DFwdPrimitive(convFwdDims); - MklConv2DFwdPrimitiveFactory::GetInstance().SetConv2DFwd( + conv2d_fwd = new Conv2DFwd(convFwdDims); + Conv2DFwdFactory::GetInstance().SetConv2DFwd( convFwdDims, conv2d_fwd); } return conv2d_fwd; } private: - MklConv2DFwdPrimitiveFactory() {} - ~MklConv2DFwdPrimitiveFactory() {} + Conv2DFwdFactory() {} + ~Conv2DFwdFactory() {} static const int kDilationH = 0, kDilationW = 1; - static MklConv2DFwdPrimitiveFactory& GetInstance() { - static MklConv2DFwdPrimitiveFactory instance_; + static Conv2DFwdFactory& GetInstance() { + static Conv2DFwdFactory instance_; return instance_; } - static std::string CreateKey(const MklConvFwdParams& convFwdDims) { + static std::string CreateKey(const ConvFwdDimensions& convFwdDims) { std::string prefix = "conv2d_fwd_"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -307,12 +266,12 @@ class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { return key_creator.GetKey(); } - MklPrimitive* GetConv2DFwd(const MklConvFwdParams& convFwdDims) { + DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) { std::string key = CreateKey(convFwdDims); return this->GetOp(key); } - void SetConv2DFwd(const MklConvFwdParams& convFwdDims, MklPrimitive *op) { + void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) { std::string key = CreateKey(convFwdDims); this->SetOp(key, op); } @@ -803,6 +762,7 @@ class MklConv2DOp : public OpKernel { MklDnnData src(&cpu_engine); MklDnnData filter(&cpu_engine); + MklDnnData dst(&cpu_engine); // output memory::dims src_dims, filter_dims, padding_left, padding_right, dilations, strides; @@ -852,6 +812,7 @@ class MklConv2DOp : public OpKernel { auto src_md = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetMklLayout() : memory::desc(src_dims, MklDnnType(), tf_fmt); + src.SetUsrMem(src_md, &src_tensor); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). @@ -859,28 +820,29 @@ class MklConv2DOp : public OpKernel { ? filter_mkl_shape.GetMklLayout() : memory::desc(filter_dims, MklDnnType(), memory::format::hwio); + filter.SetUsrMem(filter_md, &filter_tensor); // MKLDNN dilation starts from 0. dilations[kDilationH] -= 1; dilations[kDilationW] -= 1; // get a conv2d fwd from primitive pool - MklConv2DFwdPrimitive *conv2d_fwd = nullptr; + Conv2DFwd *conv2d_fwd = nullptr; if (biasEnabled) { memory::dims bias_dims = {}; conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims); - MklConvFwdParams convFwdDims(src_dims, filter_dims, bias_dims, + ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); + conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); } else { - MklConvFwdParams convFwdDims(src_dims, filter_dims, NONE_DIMS, + ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); + conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); } // allocate output tensors output_tensor and filter_out_tensor std::shared_ptr - conv_fwd_pd = conv2d_fwd->GetPrimitiveDesc(); + conv_fwd_pd = conv2d_fwd->fwd_pd_; AllocateOutputTensor(context, *conv_fwd_pd, dst_dims_mkl_order, tf_fmt, &dst_tensor); Tensor* filter_out_tensor = nullptr; @@ -892,30 +854,20 @@ class MklConv2DOp : public OpKernel { // check whether src/filter need reorder std::vector net; - T *src_data = nullptr; - if (src_md.data.format != conv2d_fwd->GetSrcMemoryFormat()) { - src.SetUsrMem(src_md, &src_tensor); - src.CheckReorderToOpMem( - conv_fwd_pd.get()->src_primitive_desc(), &net); - src_data = static_cast(src.GetOpMem().get_data_handle()); - } else { - src_data = static_cast(const_cast( - src_tensor.flat().data())); - } - T *filter_data = nullptr; - if (filter_md.data.format != conv2d_fwd->GetFilterMemoryFormat()) { - filter.SetUsrMem(filter_md, &filter_tensor); - filter.CheckReorderToOpMem( - conv_fwd_pd.get()->weights_primitive_desc(), - filter.GetTensorBuffer(filter_out_tensor), &net); - filter_data = static_cast(filter.GetOpMem().get_data_handle()); - } else { - filter_data = static_cast(const_cast( - filter_tensor.flat().data())); - } - + if (src_md.data.format != conv2d_fwd->src_fmt_) + src.CheckReorderToOpMem( + conv_fwd_pd.get()->src_primitive_desc(), &net); + + if (filter_md.data.format != conv2d_fwd->filter_fmt_) + filter.CheckReorderToOpMem( + conv_fwd_pd.get()->weights_primitive_desc(), + filter.GetTensorBuffer(filter_out_tensor), &net); stream(stream::kind::eager).submit(net).wait(); + T* src_data = static_cast( + src.GetOpMem().get_data_handle()); + T* filter_data = static_cast( + filter.GetOpMem().get_data_handle()); // execute convolution if (biasEnabled) { -- GitLab From e92ab37a625d486931cdcfa6cbd8bc32f7cd5d3c Mon Sep 17 00:00:00 2001 From: "Benjamin H. Myara" Date: Wed, 6 Jun 2018 02:52:21 +0300 Subject: [PATCH 003/380] Correction of MatMulStatsTest unit test --- tensorflow/python/kernel_tests/matmul_op_test.py | 4 ++-- tensorflow/python/ops/math_ops.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py index b167278984..9eaafb4435 100644 --- a/tensorflow/python/kernel_tests/matmul_op_test.py +++ b/tensorflow/python/kernel_tests/matmul_op_test.py @@ -142,7 +142,7 @@ class MatMulStatsTest(test_lib.TestCase): for op in g.get_operations(): flops = ops.get_stats_for_node_def(g, op.node_def, "flops").value if op.name == "MatMul": - self.assertEqual(7200, flops) + self.assertEqual(6975, flops) def testTransposedStatistics(self): g = ops.Graph() @@ -153,7 +153,7 @@ class MatMulStatsTest(test_lib.TestCase): for op in g.get_operations(): flops = ops.get_stats_for_node_def(g, op.node_def, "flops").value if op.name == "MatMul": - self.assertEqual(7200, flops) + self.assertEqual(6975, flops) try: diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index b7e3de7e85..aff5af530c 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2033,7 +2033,7 @@ def _calc_mat_mul_flops(graph, node): output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) output_shape.assert_is_fully_defined() output_count = np.prod(output_shape.as_list()) - return ops.OpStats("flops", (k * output_count * 2)) + return ops.OpStats("flops", ((2 * k - 1) * output_count)) def _as_indexed_slices(x, optimize=True): -- GitLab From f369de2bb9f28c36b8b654db3dbd4dd187482c22 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 12 Jun 2018 15:54:37 -0700 Subject: [PATCH 004/380] code refactoring per Rasmus's suggestions on PR 19754 --- tensorflow/core/kernels/mkl_relu_op.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 048d4883b2..a52c879721 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -65,7 +65,8 @@ class MklEltwiseFwdParams { template class MklEltwiseFwdPrimitive : public MklPrimitive { public: - explicit MklEltwiseFwdPrimitive(const MklEltwiseFwdParams& fwdParams) { + explicit MklEltwiseFwdPrimitive(const MklEltwiseFwdParams& fwdParams) : + cpu_engine_(engine::cpu, 0) { // store expected format context_.src_fmt = static_cast( fwdParams.src_md.data.format); @@ -90,7 +91,6 @@ class MklEltwiseFwdPrimitive : public MklPrimitive { // after execution, set data handle back context_.src_mem->set_data_handle(DummyData); context_.dst_mem->set_data_handle(DummyData); - return; } std::shared_ptr GetEltwiseFwdPd() { @@ -133,7 +133,7 @@ class MklEltwiseFwdPrimitive : public MklPrimitive { fwd_desc(nullptr), fwd_pd(nullptr), src_md(nullptr), dst_md(nullptr), src_mpd(nullptr), eltwise_fwd(nullptr), fwd_stream(nullptr) { } - } context_; + }; // Eltwise forward primitive setup void Setup(const MklEltwiseFwdParams& fwdParams) { @@ -159,10 +159,10 @@ class MklEltwiseFwdPrimitive : public MklPrimitive { *context_.src_mem, *context_.dst_mem)); context_.fwd_primitives.push_back(*context_.eltwise_fwd); - return; } - engine cpu_engine_ = engine(engine::cpu, 0); + struct EltwiseFwdContext context_; + engine cpu_engine_; }; template @@ -242,7 +242,8 @@ class MklEltwiseBwdParams { template class MklEltwiseBwdPrimitive : public MklPrimitive { public: - explicit MklEltwiseBwdPrimitive(const MklEltwiseBwdParams& bwdParams) { + explicit MklEltwiseBwdPrimitive(const MklEltwiseBwdParams& bwdParams) : + cpu_engine_(engine::cpu, 0) { context_.src_fmt = static_cast( bwdParams.common_md.data.format); context_.diff_dst_fmt = static_cast( @@ -271,7 +272,6 @@ class MklEltwiseBwdPrimitive : public MklPrimitive { context_.src_mem->set_data_handle(DummyData); context_.diff_dst_mem->set_data_handle(DummyData); context_.diff_src_mem->set_data_handle(DummyData); - return; } std::shared_ptr GetEltwiseBwdPd() { @@ -329,7 +329,7 @@ class MklEltwiseBwdPrimitive : public MklPrimitive { fwd_desc(nullptr), fwd_pd(nullptr), bwd_pd(nullptr), eltwise_bwd(nullptr), bwd_stream(nullptr) { } - } context_; + }; // Eltwise backward primitive setup void Setup(const MklEltwiseBwdParams& bwdParams) { @@ -365,10 +365,10 @@ class MklEltwiseBwdPrimitive : public MklPrimitive { *context_.src_mem, *context_.diff_dst_mem, *context_.diff_src_mem)); context_.bwd_primitives.push_back(*context_.eltwise_bwd); - return; } - engine cpu_engine_ = engine(engine::cpu, 0); + struct EltwiseBwdContext context_; + engine cpu_engine_; }; -- GitLab From 0059fe57ce7f6b8397b72acfb0ef30013d748116 Mon Sep 17 00:00:00 2001 From: PENGWA Date: Tue, 19 Jun 2018 20:37:58 +0800 Subject: [PATCH 005/380] consider gpu memory fraction option for memory optimizer (cherry picked from commit d7b2a4030d4b6d57f7453f986fdea346e8a76b7c) --- .../core/common_runtime/graph_execution_state.cc | 4 +++- .../core/grappler/optimizers/memory_optimizer.cc | 14 ++++++++------ .../core/grappler/optimizers/memory_optimizer.h | 3 +++ .../core/grappler/optimizers/meta_optimizer.cc | 12 +++++++----- .../core/grappler/optimizers/meta_optimizer.h | 9 ++++++++- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc index eb710bdbc5..d76f7b49b1 100644 --- a/tensorflow/core/common_runtime/graph_execution_state.cc +++ b/tensorflow/core/common_runtime/graph_execution_state.cc @@ -407,6 +407,8 @@ Status GraphExecutionState::OptimizeGraph( const RewriterConfig& rewrite_options = session_options_->config.graph_options().rewrite_options(); + const GPUOptions& gpu_options = + session_options_->config.gpu_options(); if (grappler::MetaOptimizerEnabled(rewrite_options)) { // Adding this functionality in steps. The first step is to make sure @@ -493,7 +495,7 @@ Status GraphExecutionState::OptimizeGraph( grappler::VirtualCluster cluster(device_map, device_set_); GraphDef new_graph; TF_RETURN_IF_ERROR(grappler::RunMetaOptimizer( - item, rewrite_options, cpu_device, &cluster, &new_graph)); + item, rewrite_options, cpu_device, &cluster, &new_graph, gpu_options)); // Merge optimized graph function library with an original library. // Optimized graph might have new functions specialized for it's diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index 1be5f8dcc2..5a2cec4358 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -943,7 +943,7 @@ struct MemInfo { static bool IdentifySwappingCandidates( Cluster* cluster, GrapplerItem* item, std::unordered_set* skip_list, - std::unordered_map* nodes_to_swap) { + std::unordered_map* nodes_to_swap, double memory_fraction) { GraphMemory memory(*item); const std::unordered_map& devices = cluster->GetDevices(); @@ -966,10 +966,10 @@ static bool IdentifySwappingCandidates( } const GraphMemory::MemoryUsage& mem_usage = memory.GetPeakMemoryUsage(name); - if (mem_usage.used_memory <= prop.memory_size()) { + if (mem_usage.used_memory <= memory_fraction * prop.memory_size()) { continue; } - int64 required_savings = mem_usage.used_memory - prop.memory_size(); + int64 required_savings = mem_usage.used_memory - memory_fraction * prop.memory_size(); std::unordered_map op_completion_times; { @@ -1105,13 +1105,14 @@ static bool IdentifySwappingCandidates( bool SwappingPass(RewriterConfig::MemOptType optimization_level, Cluster* cluster, GrapplerItem* item, - std::unordered_set* skip_list) { + std::unordered_set* skip_list, + double memory_fraction) { std::unordered_map nodes_to_swap; if (optimization_level == RewriterConfig::DEFAULT_MEM_OPT || optimization_level == RewriterConfig::SWAPPING_HEURISTICS || optimization_level == RewriterConfig::HEURISTICS) { // Use heuristics to figure out what needs to be swapped; - IdentifySwappingCandidates(cluster, item, skip_list, &nodes_to_swap); + IdentifySwappingCandidates(cluster, item, skip_list, &nodes_to_swap, memory_fraction); } // Look for manual annotatations in the graph. for (auto& node : *item->graph.mutable_node()) { @@ -1324,7 +1325,8 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimization_level_ == RewriterConfig::MANUAL) && cluster != nullptr) { updated_graph |= SwappingPass(optimization_level_, cluster, - &optimized_item, &skip_list); + &optimized_item, &skip_list, + per_process_gpu_memory_fraction_); } } diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.h b/tensorflow/core/grappler/optimizers/memory_optimizer.h index 653ffaec4c..6e03f442d6 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.h +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.h @@ -32,8 +32,10 @@ class MemoryOptimizer : public GraphOptimizer { // RewriterConfig::memory_optimizer_target_node_name_scope. explicit MemoryOptimizer( RewriterConfig::MemOptType optimization_level, + double per_process_gpu_memory_fraction = 1.0, const string& recomputation_targets_name_scope = "gradients/") : optimization_level_(optimization_level), + per_process_gpu_memory_fraction_(per_process_gpu_memory_fraction), recomputation_targets_name_scope_(recomputation_targets_name_scope) {} ~MemoryOptimizer() override {} @@ -47,6 +49,7 @@ class MemoryOptimizer : public GraphOptimizer { private: RewriterConfig::MemOptType optimization_level_; + double per_process_gpu_memory_fraction_; string recomputation_targets_name_scope_; }; diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 143d9dc1c6..e0ab7e00e9 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -83,7 +83,7 @@ std::unique_ptr MetaOptimizer::MakeNewOptimizer( MK_OPT("shape", new ShapeOptimizer()); MK_OPT("remap", new Remapper(cfg_.remapping())); MK_OPT("layout", new LayoutOptimizer()); - MK_OPT("memory", new MemoryOptimizer(RewriterConfig::MANUAL)); + MK_OPT("memory", new MemoryOptimizer(RewriterConfig::MANUAL, gpu_options_.per_process_gpu_memory_fraction())); MK_OPT("arithmetic", new ArithmeticOptimizer(cfg_.arithmetic_optimization())); MK_OPT("autoparallel", new AutoParallel(cfg_.auto_parallel().num_replicas())); MK_OPT("loop", new LoopOptimizer(cfg_.loop_optimization())); @@ -134,13 +134,14 @@ Status MetaOptimizer::InitializeOptimizers( optimizers->emplace_back(new LayoutOptimizer()); } if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { + double mem_fraction = gpu_options_.per_process_gpu_memory_fraction(); if (cfg_.memory_optimizer_target_node_name_scope().empty()) { optimizers->emplace_back( // Use the default target node name prefix "gradients/" - new MemoryOptimizer(cfg_.memory_optimization())); + new MemoryOptimizer(cfg_.memory_optimization(), mem_fraction)); } else { optimizers->emplace_back( - new MemoryOptimizer(cfg_.memory_optimization(), + new MemoryOptimizer(cfg_.memory_optimization(), mem_fraction, cfg_.memory_optimizer_target_node_name_scope())); } } @@ -412,8 +413,9 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, DeviceBase* cpu_device, Cluster* cluster, - GraphDef* optimized_graph) { - MetaOptimizer optimizer(cpu_device, cfg); + GraphDef* optimized_graph, + const GPUOptions& gpu_options) { + MetaOptimizer optimizer(cpu_device, cfg, gpu_options); return optimizer.Optimize(cluster, item, optimized_graph); } diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h index 151a54cbdf..74b6bb7f74 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.h +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" +#include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { namespace grappler { @@ -30,6 +31,10 @@ class MetaOptimizer : public GraphOptimizer { public: MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg) : cpu_device_(cpu_device), cfg_(cfg) {} + + MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg, const GPUOptions& gpu_options) + : cpu_device_(cpu_device), cfg_(cfg), gpu_options_(gpu_options) {} + ~MetaOptimizer() override = default; string name() const override { return "meta_optimizer"; }; @@ -77,6 +82,7 @@ class MetaOptimizer : public GraphOptimizer { GraphOptimizationResult* optimization_result); std::vector optimization_results_; + GPUOptions gpu_options_; }; bool MetaOptimizerEnabled(const RewriterConfig& cfg); @@ -89,7 +95,8 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg); // when possible. Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, DeviceBase* cpu_device, Cluster* cluster, - GraphDef* optimized_graph); + GraphDef* optimized_graph, + const GPUOptions& gpu_options); } // namespace grappler } // namespace tensorflow -- GitLab From afbe36c5126cf118c60cbf22454d99d429425334 Mon Sep 17 00:00:00 2001 From: "Peng Wang(SIMPENG)" Date: Sat, 23 Jun 2018 06:03:41 +0000 Subject: [PATCH 006/380] Merge master change --- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 7 +++++++ tensorflow/core/grappler/optimizers/meta_optimizer.h | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index e0ab7e00e9..0d2b9a5763 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -411,6 +411,13 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { !cfg.optimizers().empty() || !cfg.custom_optimizers().empty(); } +Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, + DeviceBase* cpu_device, Cluster* cluster, + GraphDef* optimized_graph) { + MetaOptimizer optimizer(cpu_device, cfg); + return optimizer.Optimize(cluster, item, optimized_graph); +} + Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, DeviceBase* cpu_device, Cluster* cluster, GraphDef* optimized_graph, diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h index 74b6bb7f74..c267b5fd8e 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.h +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h @@ -93,6 +93,10 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg); // during constant folding; if NULL, a new device is created for doing constant // folding. For performance, it is recommended to pass in an existing cpu_device // when possible. +Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, + DeviceBase* cpu_device, Cluster* cluster, + GraphDef* optimized_graph); + Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, DeviceBase* cpu_device, Cluster* cluster, GraphDef* optimized_graph, -- GitLab From f814e242d16997dba8b9bbded3ef6e2540e2d044 Mon Sep 17 00:00:00 2001 From: "Li, Yiqiang" Date: Sun, 15 Jul 2018 20:13:09 +0800 Subject: [PATCH 007/380] Replace to use fast reorder path in MklRelu op. --- tensorflow/core/kernels/mkl_relu_op.cc | 17 ++++++----------- tensorflow/core/util/mkl_util.h | 12 +++++++----- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index f73d3d81f9..3d5a05be73 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -814,11 +814,9 @@ class MklReluOpBase : public OpKernel { // check wehther src need to reorder if (src_md.data.format != eltwise_fwd->GetSrcMemoryFormat()) { src.SetUsrMem(src_md, &src_tensor); - std::vector net; auto src_target_pd = memory::primitive_desc({{src_dims}, MklDnnType(), eltwise_fwd->GetSrcMemoryFormat()}, cpu_engine); - src.CheckReorderToOpMem(src_target_pd, &net); - stream(stream::kind::eager).submit(net).wait(); + src.CheckReorderToOpMem(src_target_pd); src_data = static_cast(src.GetOpMem().get_data_handle()); } else { src_data = static_cast( @@ -882,9 +880,8 @@ class MklReluGradOpBase : public OpKernel { virtual void Compute_Scalar(OpKernelContext* context) = 0; - void Compute(OpKernelContext* context) { + void Compute(OpKernelContext* context) { try { - // auto cpu_engine = engine(engine::cpu, 0); MklDnnData src(&cpu_engine); MklDnnData diff_dst(&cpu_engine); @@ -892,9 +889,9 @@ class MklReluGradOpBase : public OpKernel { const size_t src_index = 1; // index of src input tensor const size_t diff_src_index = 0; // index of diff_src output tensor - const Tensor& src_tensor = MklGetInput(context, src_index); + const Tensor& src_tensor = MklGetInput(context, src_index); const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index); - Tensor* diff_src_tensor = nullptr; + Tensor* diff_src_tensor = nullptr; MklDnnShape dnn_shape_src, dnn_shape_diff_dst; GetMklShape(context, src_index, &dnn_shape_src); @@ -969,11 +966,10 @@ class MklReluGradOpBase : public OpKernel { // check whether need reorder for src / diff_dst T* src_data; T* diff_dst_data; - std::vector net; if (src_md.data.format != eltwise_bwd->GetSrcMemoryFormat()) { src.SetUsrMem(src_md, &src_tensor); src.CheckReorderToOpMem( - eltwise_bwd_pd.get()->diff_src_primitive_desc(), &net); + eltwise_bwd_pd.get()->diff_src_primitive_desc()); src_data = static_cast(src.GetOpMem().get_data_handle()); } else { src_data = static_cast( @@ -983,14 +979,13 @@ class MklReluGradOpBase : public OpKernel { if (diff_dst_md.data.format != eltwise_bwd->GetDiffDstMemoryFormat()) { diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); diff_dst.CheckReorderToOpMem( - eltwise_bwd_pd.get()->diff_src_primitive_desc(), &net); + eltwise_bwd_pd.get()->diff_src_primitive_desc()); diff_dst_data = static_cast( diff_dst.GetOpMem().get_data_handle()); } else { diff_dst_data = static_cast(const_cast( diff_dst_tensor.flat().data())); } - stream(stream::kind::eager).submit(net).wait(); // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index bb447e0393..b2c93a508d 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1897,8 +1897,9 @@ class MklPrimitiveFactory { ~MklPrimitiveFactory() {} MklPrimitive* GetOp(const std::string& key) { - auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); - if (stream_iter == MklPrimitiveFactory::GetHashMap().end()) { + auto &map = MklPrimitiveFactory::GetHashMap(); + auto stream_iter = map.find(key); + if (stream_iter == map.end()) { return nullptr; } else { return stream_iter->second; @@ -1906,11 +1907,12 @@ class MklPrimitiveFactory { } void SetOp(const std::string& key, MklPrimitive* op) { - auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); + auto &map = MklPrimitiveFactory::GetHashMap(); + auto stream_iter = map.find(key); - CHECK(stream_iter == MklPrimitiveFactory::GetHashMap().end()); + CHECK(stream_iter == map.end()); - MklPrimitiveFactory::GetHashMap()[key] = op; + map[key] = op; } private: -- GitLab From 2fcfb4abde9d847cff5a344cf06b2704cb6f9545 Mon Sep 17 00:00:00 2001 From: "Peng Wang (SIMPENG)" Date: Fri, 20 Jul 2018 16:25:56 +0800 Subject: [PATCH 008/380] fix build error --- tensorflow/core/grappler/optimizers/memory_optimizer_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc index a3f0e07861..49543645f6 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc @@ -114,7 +114,7 @@ TEST_F(RecomputeSubgraphTest, TwoInputSubgraphs) { (*pre_transform_node_map.GetNode("b")->mutable_attr())["_recompute_hint"] .set_i(0); - MemoryOptimizer optimizer(RewriterConfig::MANUAL, + MemoryOptimizer optimizer(RewriterConfig::MANUAL,1.0, "some_name_scope/gradients"); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); -- GitLab From d76aaad2ea9ee4df8c32b382db758854315d230e Mon Sep 17 00:00:00 2001 From: "Peng Wang (SIMPENG)" Date: Fri, 20 Jul 2018 17:50:51 +0800 Subject: [PATCH 009/380] change format a bit --- tensorflow/core/grappler/optimizers/memory_optimizer_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc index 49543645f6..1473e26cbd 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc @@ -114,7 +114,7 @@ TEST_F(RecomputeSubgraphTest, TwoInputSubgraphs) { (*pre_transform_node_map.GetNode("b")->mutable_attr())["_recompute_hint"] .set_i(0); - MemoryOptimizer optimizer(RewriterConfig::MANUAL,1.0, + MemoryOptimizer optimizer(RewriterConfig::MANUAL, 1.0, "some_name_scope/gradients"); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); -- GitLab From 5e07ab3ff51b06c3291789ac0e65499217f720d1 Mon Sep 17 00:00:00 2001 From: Vitaly Lavrukhin Date: Fri, 27 Jul 2018 07:32:47 -0700 Subject: [PATCH 010/380] Added a normalization term to ctc_beam_search_decoder in order to get correct log probabilities. It solves https://github.com/tensorflow/tensorflow/issues/6034 --- tensorflow/core/util/ctc/ctc_beam_search.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/util/ctc/ctc_beam_search.h b/tensorflow/core/util/ctc/ctc_beam_search.h index 709c65fc96..fa58b5cedf 100644 --- a/tensorflow/core/util/ctc/ctc_beam_search.h +++ b/tensorflow/core/util/ctc/ctc_beam_search.h @@ -259,6 +259,16 @@ void CTCBeamSearchDecoder::Step( } else { max_coeff = raw_input.maxCoeff(); } + + // Get normalization term of softmax: log(sum(exp(logit[j]-max_coeff))). + float logsumexp = 0.0; + for (int j = 0; j < raw_input.size(); ++j) { + logsumexp += expf(raw_input(j) - max_coeff); + } + logsumexp = logf(logsumexp); + // Final normalization offset to get correct log probabilities. + float norm_offset = max_coeff + logsumexp; + const float label_selection_input_min = (label_selection_margin_ >= 0) ? (max_coeff - label_selection_margin_) : -std::numeric_limits::infinity(); @@ -290,10 +300,10 @@ void CTCBeamSearchDecoder::Step( beam_scorer_->GetStateExpansionScore(b->state, previous)); } // Plabel(l=abc @ t=6) *= P(c @ 6) - b->newp.label += raw_input(b->label) - max_coeff; + b->newp.label += raw_input(b->label) - norm_offset; } // Pblank(l=abc @ t=6) = P(l=abc @ t=5) * P(- @ 6) - b->newp.blank = b->oldp.total + raw_input(blank_index_) - max_coeff; + b->newp.blank = b->oldp.total + raw_input(blank_index_) - norm_offset; // P(l=abc @ t=6) = Plabel(l=abc @ t=6) + Pblank(l=abc @ t=6) b->newp.total = LogSumExp(b->newp.blank, b->newp.label); @@ -328,6 +338,8 @@ void CTCBeamSearchDecoder::Step( const float logit = top_k ? top_k_logits[ind] : raw_input(ind); // Perform label selection: if input for this label looks very // unpromising, never evaluate it with a scorer. + // We may compare logits instead of log probabilities, + // since the difference is the same in both cases. if (logit < label_selection_input_min) { continue; } @@ -341,7 +353,7 @@ void CTCBeamSearchDecoder::Step( // Plabel(l=abcd @ t=6) = P(l=abc @ t=5) * P(d @ 6) beam_scorer_->ExpandState(b->state, b->label, &c.state, c.label); float previous = (c.label == b->label) ? b->oldp.blank : b->oldp.total; - c.newp.label = logit - max_coeff + + c.newp.label = logit - norm_offset + beam_scorer_->GetStateExpansionScore(c.state, previous); // P(l=abcd @ t=6) = Plabel(l=abcd @ t=6) c.newp.total = c.newp.label; -- GitLab From 6d76eaaa751cc56a68dcf6e39c5d72191b9be26e Mon Sep 17 00:00:00 2001 From: Vitaly Lavrukhin Date: Fri, 27 Jul 2018 17:16:05 -0700 Subject: [PATCH 011/380] Fix log probabilities in test --- .../kernel_tests/ctc_decoder_ops_test.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py b/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py index e1920eb568..41ae0b456f 100644 --- a/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py +++ b/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py @@ -188,11 +188,11 @@ class CTCGreedyDecoderTest(test.TestCase): ], dtype=np.float32) # Add arbitrary offset - this is fine - input_log_prob_matrix_0 = np.log(input_prob_matrix_0) + 2.0 + input_prob_matrix_0 = input_prob_matrix_0 + 2.0 # len max_time_steps array of batch_size x depth matrices inputs = ([ - input_log_prob_matrix_0[t, :][np.newaxis, :] for t in range(seq_len_0) + input_prob_matrix_0[t, :][np.newaxis, :] for t in range(seq_len_0) ] # Pad to max_time_steps = 8 + 2 * [np.zeros( (1, depth), dtype=np.float32)]) @@ -200,11 +200,11 @@ class CTCGreedyDecoderTest(test.TestCase): # batch_size length vector of sequence_lengths seq_lens = np.array([seq_len_0], dtype=np.int32) - # batch_size length vector of negative log probabilities + # batch_size length vector of log probabilities log_prob_truth = np.array( [ - 0.584855, # output beam 0 - 0.389139 # output beam 1 + -5.811451, # output beam 0 + -6.63339 # output beam 1 ], np.float32)[np.newaxis, :] @@ -215,11 +215,11 @@ class CTCGreedyDecoderTest(test.TestCase): [[0, 0], [0, 1]], dtype=np.int64), np.array( [1, 0], dtype=np.int64), np.array( [1, 2], dtype=np.int64)), - # beam 1, batch 0, three outputs decoded + # beam 1, batch 0, one output decoded (np.array( - [[0, 0], [0, 1], [0, 2]], dtype=np.int64), np.array( - [0, 1, 0], dtype=np.int64), np.array( - [1, 3], dtype=np.int64)), + [[0, 0]], dtype=np.int64), np.array( + [1], dtype=np.int64), np.array( + [1, 1], dtype=np.int64)), ] # Test correct decoding. -- GitLab From 35c81bb208622589abaebccb35c44da9148e2d14 Mon Sep 17 00:00:00 2001 From: Stefan Dyulgerov Date: Sun, 22 Jul 2018 17:07:34 +0300 Subject: [PATCH 012/380] ignore cmake build artifacts --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5afe375f46..4e526261c7 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ __pycache__ *.swp .vscode/ cmake_build/ +tensorflow/contrib/cmake/_build/ .idea/** /build/ [Bb]uild/ -- GitLab From 171b34a519ea2c888d0f9fd754ca8a8c5ed02587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sun, 29 Jul 2018 09:21:29 +0800 Subject: [PATCH 013/380] PREP: use np.array to avoid copy behavior of index tensor --- tensorflow/python/ops/array_grad.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index a2b5f77f91..d709f6b36b 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from math import ceil +import numpy as np from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context @@ -774,17 +775,25 @@ def _ExtractImagePatchesGrad(op, grad): row_steps = range(0, rows_out * stride_r, stride_r) col_steps = range(0, cols_out * stride_h, stride_h) - idx = [] + idx = np.zeros((rows_out * cols_out * ksize_r * ksize_c, 2), + dtype=np.int64) + idx_len = 0 for i in range(rows_out): + r_low = row_steps[i] - pad_rows + r_high = r_low + ksize_r_eff + for j in range(cols_out): - r_low, c_low = row_steps[i] - pad_rows, col_steps[j] - pad_cols - r_high, c_high = r_low + ksize_r_eff, c_low + ksize_c_eff - - idx.extend([(r * (cols_in) + c, i * (cols_out * ksize_r * ksize_c) + j * - (ksize_r * ksize_c) + ri * (ksize_c) + ci) - for (ri, r) in enumerate(range(r_low, r_high, rate_r)) - for (ci, c) in enumerate(range(c_low, c_high, rate_c)) - if 0 <= r and r < rows_in and 0 <= c and c < cols_in]) + c_low = col_steps[j] - pad_cols + c_high = c_low + ksize_c_eff + + for (ri, r) in enumerate(range(r_low, r_high, rate_r)): + for (ci, c) in enumerate(range(c_low, c_high, rate_c)): + if 0 <= r and r < rows_in and 0 <= c and c < cols_in: + idx[idx_len][0] = r * (cols_in) + c + idx[idx_len][1] = (i * (cols_out * ksize_r * ksize_c) + + j * (ksize_r * ksize_c) + ri * (ksize_c) + ci) + idx_len += 1 + idx = idx[:idx_len] sp_shape = (rows_in * cols_in, rows_out * cols_out * ksize_r * ksize_c) -- GitLab From 8e761899a7a8102334fc688b6b0fb69a23e93f92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sun, 29 Jul 2018 17:11:45 +0800 Subject: [PATCH 014/380] PREP: faster method for construction idx array --- tensorflow/python/ops/array_grad.py | 86 ++++++++++++----------------- 1 file changed, 36 insertions(+), 50 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index d709f6b36b..4578639649 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -735,7 +735,6 @@ def _QuantizeAndDequantizeV3Grad(_, grad): @ops.RegisterGradient("ExtractImagePatches") def _ExtractImagePatchesGrad(op, grad): - batch_size, rows_in, cols_in, channels = [ dim.value for dim in op.inputs[0].get_shape() ] @@ -743,28 +742,44 @@ def _ExtractImagePatchesGrad(op, grad): batch_size = input_bhwc[0] channels = input_bhwc[3] + # Create indices matrix for input tensor. + # Note that 0 is preserved for padding location, + # so indice for input starts from 1 to 1 + rows_in * cols_in. + input_indices_num = 1 + rows_in * cols_in + input_idx = array_ops.reshape(math_ops.range(1, input_indices_num, + dtype=ops.dtypes.int64), + (1, rows_in, cols_in, 1)) + input_idx_patched = gen_array_ops.extract_image_patches( + input_idx, + op.get_attr("ksizes"), + op.get_attr("strides"), + op.get_attr("rates"), + op.get_attr("padding")) + + # Create indices matrix for output tensor. _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].get_shape()] _, ksize_r, ksize_c, _ = op.get_attr("ksizes") - _, stride_r, stride_h, _ = op.get_attr("strides") - _, rate_r, rate_c, _ = op.get_attr("rates") - padding = op.get_attr("padding") - - ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1) - ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1) - - if padding == b"SAME": - rows_out = int(ceil(rows_in / stride_r)) - cols_out = int(ceil(cols_in / stride_h)) - pad_rows = ((rows_out - 1) * stride_r + ksize_r_eff - rows_in) // 2 - pad_cols = ((cols_out - 1) * stride_h + ksize_c_eff - cols_in) // 2 - - elif padding == b"VALID": - rows_out = int(ceil((rows_in - ksize_r_eff + 1) / stride_r)) - cols_out = int(ceil((cols_in - ksize_c_eff + 1) / stride_h)) - pad_rows = (rows_out - 1) * stride_r + ksize_r_eff - rows_in - pad_cols = (cols_out - 1) * stride_h + ksize_c_eff - cols_in - - pad_rows, pad_cols = max(0, pad_rows), max(0, pad_cols) + # Indice for output starts from 0. + output_indices_num = rows_out * cols_out * ksize_r * ksize_c + output_idx = array_ops.reshape(math_ops.range(output_indices_num, + dtype=ops.dtypes.int64), + (1, rows_out, cols_out, ksize_r * ksize_c)) + + # Construct mapping table for indices: input -> output. + idx_matrix = array_ops.concat([array_ops.expand_dims(input_idx_patched, axis=-1), + array_ops.expand_dims(output_idx, axis=-1)], + axis=-1) + idx_map = array_ops.reshape(idx_matrix, (-1, 2)) + + sp_shape = (input_indices_num, output_indices_num) + sp_mat = sparse_tensor.SparseTensor( + idx_map, + array_ops.ones_like(idx_map[:, 0], dtype=grad.dtype), + sp_shape) + # Remove all padding locations: [0, :]. + sp_mat = sparse_ops.sparse_slice(sp_mat, + (1, 0), + (input_indices_num - 1, output_indices_num)) grad_expanded = array_ops.transpose( array_ops.reshape( @@ -772,35 +787,6 @@ def _ExtractImagePatchesGrad(op, grad): (1, 2, 3, 4, 0, 5)) grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels)) - row_steps = range(0, rows_out * stride_r, stride_r) - col_steps = range(0, cols_out * stride_h, stride_h) - - idx = np.zeros((rows_out * cols_out * ksize_r * ksize_c, 2), - dtype=np.int64) - idx_len = 0 - for i in range(rows_out): - r_low = row_steps[i] - pad_rows - r_high = r_low + ksize_r_eff - - for j in range(cols_out): - c_low = col_steps[j] - pad_cols - c_high = c_low + ksize_c_eff - - for (ri, r) in enumerate(range(r_low, r_high, rate_r)): - for (ci, c) in enumerate(range(c_low, c_high, rate_c)): - if 0 <= r and r < rows_in and 0 <= c and c < cols_in: - idx[idx_len][0] = r * (cols_in) + c - idx[idx_len][1] = (i * (cols_out * ksize_r * ksize_c) + - j * (ksize_r * ksize_c) + ri * (ksize_c) + ci) - idx_len += 1 - idx = idx[:idx_len] - - sp_shape = (rows_in * cols_in, rows_out * cols_out * ksize_r * ksize_c) - - sp_mat = sparse_tensor.SparseTensor( - array_ops.constant(idx, dtype=ops.dtypes.int64), - array_ops.ones((len(idx),), dtype=grad.dtype), sp_shape) - jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat) grad_out = array_ops.reshape(jac, (rows_in, cols_in, batch_size, channels)) -- GitLab From c22b5c678a42474fbc9aab59345ac09eeb685c37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sun, 29 Jul 2018 17:20:45 +0800 Subject: [PATCH 015/380] CLN: remove unused import --- tensorflow/python/ops/array_grad.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 4578639649..33c960e0dc 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -18,9 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from math import ceil -import numpy as np - from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.framework import constant_op -- GitLab From 0d49774a0487b26737b950b510605833671775d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sun, 29 Jul 2018 17:23:23 +0800 Subject: [PATCH 016/380] CLN: typo: indices --- tensorflow/python/ops/array_grad.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 33c960e0dc..b6f03144b1 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -741,7 +741,7 @@ def _ExtractImagePatchesGrad(op, grad): # Create indices matrix for input tensor. # Note that 0 is preserved for padding location, - # so indice for input starts from 1 to 1 + rows_in * cols_in. + # so indices for input start from 1 to 1 + rows_in * cols_in. input_indices_num = 1 + rows_in * cols_in input_idx = array_ops.reshape(math_ops.range(1, input_indices_num, dtype=ops.dtypes.int64), @@ -756,7 +756,7 @@ def _ExtractImagePatchesGrad(op, grad): # Create indices matrix for output tensor. _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].get_shape()] _, ksize_r, ksize_c, _ = op.get_attr("ksizes") - # Indice for output starts from 0. + # Indices for output start from 0. output_indices_num = rows_out * cols_out * ksize_r * ksize_c output_idx = array_ops.reshape(math_ops.range(output_indices_num, dtype=ops.dtypes.int64), -- GitLab From 4f456bc6f19d667a6d32a7459742b3139e8fe617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sun, 29 Jul 2018 22:52:44 +0800 Subject: [PATCH 017/380] CLN: clean codes --- tensorflow/python/ops/array_grad.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index b6f03144b1..328b4f7d53 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -762,19 +762,19 @@ def _ExtractImagePatchesGrad(op, grad): dtype=ops.dtypes.int64), (1, rows_out, cols_out, ksize_r * ksize_c)) - # Construct mapping table for indices: input -> output. + # Construct mapping table for indices: (input -> output). idx_matrix = array_ops.concat([array_ops.expand_dims(input_idx_patched, axis=-1), array_ops.expand_dims(output_idx, axis=-1)], axis=-1) idx_map = array_ops.reshape(idx_matrix, (-1, 2)) sp_shape = (input_indices_num, output_indices_num) - sp_mat = sparse_tensor.SparseTensor( + sp_mat_full = sparse_tensor.SparseTensor( idx_map, array_ops.ones_like(idx_map[:, 0], dtype=grad.dtype), sp_shape) - # Remove all padding locations: [0, :]. - sp_mat = sparse_ops.sparse_slice(sp_mat, + # Remove all padding locations [0, :]. + sp_mat = sparse_ops.sparse_slice(sp_mat_full, (1, 0), (input_indices_num - 1, output_indices_num)) -- GitLab From e6ae2664c5f72f09c9a6d102a89963c4a9bbf8f1 Mon Sep 17 00:00:00 2001 From: Johannes Schmitz Date: Tue, 31 Jul 2018 20:05:29 +0200 Subject: [PATCH 018/380] Improve readability of Tensor::CheckType error output --- tensorflow/core/framework/tensor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index 384a42fc11..2e5426712b 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -613,13 +613,13 @@ bool Tensor::IsInitialized() const { } void Tensor::CheckType(DataType expected_dtype) const { - CHECK_EQ(dtype(), expected_dtype) + CHECK_EQ(dtype(), expected_dtype) << " " << DataTypeString(expected_dtype) << " expected, got " << DataTypeString(dtype()); } void Tensor::CheckTypeAndIsAligned(DataType expected_dtype) const { - CHECK_EQ(dtype(), expected_dtype) + CHECK_EQ(dtype(), expected_dtype) << " " << DataTypeString(expected_dtype) << " expected, got " << DataTypeString(dtype()); CHECK(IsAligned()) << "ptr = " << base(); -- GitLab From 94e0c6bb67b82eb1a43135eb5edff6c6fe4ab638 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Wed, 18 Jul 2018 16:59:07 -0700 Subject: [PATCH 019/380] Add new Dockerfile assembler based on partials This change adds a new suite of TensorFlow dockerfiles. The dockerfiles come from an assembler controlled by a yaml spec, and are based on a set of re-usable partial dockerfiles. The assembler and spec include conveniences like spec validation, references to other images and specs for minimizing repetition, and arg expansion. --- tensorflow/tools/docker/README.md | 7 + tensorflow/tools/dockerfiles/Dockerfile | 11 + tensorflow/tools/dockerfiles/README.md | 38 ++ tensorflow/tools/dockerfiles/assembler.py | 528 ++++++++++++++++++ tensorflow/tools/dockerfiles/bashrc | 33 ++ .../dockerfiles/cpu-devel-jupyter.Dockerfile | 85 +++ .../dockerfiles/cpu-devel.Dockerfile | 74 +++ .../dockerfiles/cpu-jupyter.Dockerfile | 54 ++ .../dockerfiles/dockerfiles/cpu.Dockerfile | 43 ++ .../nvidia-devel-jupyter.Dockerfile | 105 ++++ .../dockerfiles/nvidia-devel.Dockerfile | 94 ++++ .../dockerfiles/nvidia-jupyter.Dockerfile | 75 +++ .../dockerfiles/dockerfiles/nvidia.Dockerfile | 64 +++ .../partials/bazel.partial.Dockerfile | 13 + .../partials/jupyter.partial.Dockerfile | 8 + .../partials/nvidia-devel.partial.Dockerfile | 43 ++ .../partials/nvidia.partial.Dockerfile | 23 + .../partials/python.partial.Dockerfile | 12 + .../partials/shell.partial.Dockerfile | 2 + .../partials/tensorflow.partial.Dockerfile | 2 + .../partials/ubuntu-devel.partial.Dockerfile | 24 + .../partials/ubuntu.partial.Dockerfile | 2 + tensorflow/tools/dockerfiles/spec.yml | 177 ++++++ 23 files changed, 1517 insertions(+) create mode 100644 tensorflow/tools/dockerfiles/Dockerfile create mode 100644 tensorflow/tools/dockerfiles/README.md create mode 100644 tensorflow/tools/dockerfiles/assembler.py create mode 100644 tensorflow/tools/dockerfiles/bashrc create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/cpu-devel-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/cpu-devel.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/nvidia-devel-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/nvidia-devel.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/nvidia-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/nvidia.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/bazel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/nvidia-devel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/nvidia.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/python.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/shell.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/tensorflow.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/ubuntu-devel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/ubuntu.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/spec.yml diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md index 525f2995ce..41b8ffdf72 100644 --- a/tensorflow/tools/docker/README.md +++ b/tensorflow/tools/docker/README.md @@ -1,3 +1,10 @@ +# WARNING: THESE IMAGES ARE DEPRECATED. + +TensorFlow's Dockerfiles are now located in +[`tensorflow/tools/dockerfiles/`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/dockerfiles). + +This directory will eventually be removed. + # Using TensorFlow via Docker This directory contains `Dockerfile`s to make it easy to get up and running with diff --git a/tensorflow/tools/dockerfiles/Dockerfile b/tensorflow/tools/dockerfiles/Dockerfile new file mode 100644 index 0000000000..e8ca012298 --- /dev/null +++ b/tensorflow/tools/dockerfiles/Dockerfile @@ -0,0 +1,11 @@ +FROM hadolint/hadolint:latest-debian +LABEL maintainer="Austin Anderson " + +RUN apt-get update && apt-get install -y python3 python3-pip bash +RUN pip3 install --upgrade pip setuptools pyyaml absl-py cerberus + +WORKDIR /tf +VOLUME ["/tf"] + +COPY bashrc /etc/bash.bashrc +RUN chmod 777 /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/README.md b/tensorflow/tools/dockerfiles/README.md new file mode 100644 index 0000000000..1fe51adb4a --- /dev/null +++ b/tensorflow/tools/dockerfiles/README.md @@ -0,0 +1,38 @@ +# TensorFlow Dockerfiles + +This directory houses TensorFlow's Dockerfiles. **DO NOT EDIT THE DOCKERFILES +MANUALLY!** They are maintained by `assembler.py`, which builds Dockerfiles from +the files in `partials/` and the rules in `spec.yml`. See [the Maintaining +section](#maintaining) for more information. + +## Building + +The Dockerfiles in the `dockerfiles` directory must have their build context set +to **the directory with this README.md** to copy in helper files. For example: + +```bash +$ docker build -f ./dockerfiles/cpu.Dockerfile -t tf-cpu . +``` + +Each Dockerfile has its own set of available `--build-arg`s which are documented +in the Dockerfile itself. + +## Maintaining + +To make changes to TensorFlow's Dockerfiles, you'll update `spec.yml` and the +`*.partial.Dockerfile` files in the `partials` directory, then run +`assembler.py` to re-generate the full Dockerfiles before creating a pull +request. + +You can use the `Dockerfile` in this directory to build an editing environment +that has all of the Python dependencies you'll need: + +```bash +$ docker build -t tf-assembler . + +# Set --user to set correct permissions on generated files +$ docker run --user $(id -u):$(id -g) -it -v $(pwd):/tf tf-assembler bash + +# In the container... +/tf $ python3 ./assembler.py -o dockerfiles -s spec.yml --validate +``` diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py new file mode 100644 index 0000000000..a33c42ace6 --- /dev/null +++ b/tensorflow/tools/dockerfiles/assembler.py @@ -0,0 +1,528 @@ +"""Assemble common TF Dockerfiles from many parts. + +TODO(angerson): DO NOT SUBMIT without a detailed description of assembler. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import errno +import os +import os.path +import re +import shutil +import textwrap + +from absl import app +from absl import flags +import cerberus +import yaml + +FLAGS = flags.FLAGS + +flags.DEFINE_boolean( + 'dry_run', False, 'Do not actually generate Dockerfiles', short_name='n') + +flags.DEFINE_string( + 'spec_file', + './spec.yml', + 'Path to a YAML specification file', + short_name='s') + +flags.DEFINE_string( + 'output_dir', + '.', ('Path to an output directory for Dockerfiles. ' + 'Will be created if it doesn\'t exist.'), + short_name='o') + +flags.DEFINE_string( + 'partial_dir', + './partials', + 'Path to a directory containing foo.partial.Dockerfile partial files.', + short_name='p') + +flags.DEFINE_boolean( + 'quiet_dry_run', + True, + 'Do not print contents of dry run Dockerfiles.', + short_name='q') + +flags.DEFINE_boolean( + 'validate', True, 'Validate generated Dockerfiles', short_name='c') + +# Schema to verify the contents of spec.yml with Cerberus. +# Must be converted to a dict from yaml to work. +# Note: can add python references with e.g. +# !!python/name:builtins.str +# !!python/name:__main__.funcname +SCHEMA_TEXT = """ +header: + type: string + +partials: + type: dict + keyschema: + type: string + valueschema: + type: dict + schema: + desc: + type: string + args: + type: dict + keyschema: + type: string + valueschema: + anyof: + - type: [ boolean, number, string ] + - type: dict + schema: + default: + type: [ boolean, number, string ] + desc: + type: string + options: + type: list + schema: + type: string + +images: + keyschema: + type: string + valueschema: + type: dict + schema: + desc: + type: string + arg-defaults: + type: list + schema: + anyof: + - type: dict + keyschema: + type: string + arg_in_use: true + valueschema: + type: string + - type: string + isimage: true + create-dockerfile: + type: boolean + partials: + type: list + schema: + anyof: + - type: dict + keyschema: + type: string + regex: image + valueschema: + type: string + isimage: true + - type: string + ispartial: true +""" + + +class TfDockerValidator(cerberus.Validator): + """Custom Cerberus validator for TF dockerfile spec. + + Note that each custom validator's docstring must end with a segment describing + its own validation schema. + """ + + def _validate_ispartial(self, ispartial, field, value): + """Validate that a partial references an existing partial spec. + + Args: + ispartial: Value of the rule, a bool + field: The field being validated + value: The field's value + + The rule's arguments are validated against this schema: + {'type': 'boolean'} + """ + if ispartial and value not in self.root_document.get('partials', dict()): + self._error(field, '{} is not an existing partial.'.format(value)) + + def _validate_isimage(self, isimage, field, value): + """Validate that an image references an existing partial spec. + + Args: + isimage: Value of the rule, a bool + field: The field being validated + value: The field's value + + The rule's arguments are validated against this schema: + {'type': 'boolean'} + """ + if isimage and value not in self.root_document.get('images', dict()): + self._error(field, '{} is not an existing image.'.format(value)) + + def _validate_arg_in_use(self, arg_in_use, field, value): + """Validate that an arg references an existing partial spec's args. + + Args: + arg_in_use: Value of the rule, a bool + field: The field being validated + value: The field's value + + The rule's arguments are validated against this schema: + {'type': 'boolean'} + """ + if arg_in_use: + for partial in self.root_document.get('partials', dict()).values(): + if value in partial.get('args', tuple()): + return + self._error(field, '{} is not an arg used in any partial.'.format(value)) + + +def build_partial_description(partial_spec): + """Create the documentation lines for a specific partial. + + Generates something like this: + + # This is the partial's description, from spec.yml. + # --build-arg ARG_NAME=argdefault + # this is one of the args. + # --build-arg ANOTHER_ARG=(some|choices) + # another arg. + + Args: + partial_spec: A dict representing one of the partials from spec.yml. Doesn't + include the name of the partial; is a dict like { desc: ..., args: ... }. + + Returns: + A commented string describing this partial. + """ + + # Start from linewrapped desc field + lines = [] + wrapper = textwrap.TextWrapper( + initial_indent='# ', subsequent_indent='# ', width=80) + description = wrapper.fill(partial_spec.get('desc', '( no comments )')) + lines.extend(['#', description]) + + # Document each arg + for arg, arg_data in partial_spec.get('args', dict()).items(): + + # Wrap arg description with comment lines + desc = arg_data.get('desc', '( no description )') + desc = textwrap.fill( + desc, + initial_indent='# ', + subsequent_indent='# ', + width=80, + drop_whitespace=False) + + # Document (each|option|like|this) + if 'options' in arg_data: + arg_options = ' ({})'.format('|'.join(arg_data['options'])) + else: + arg_options = '' + + # Add usage sample + arg_use = '# --build-arg {}={}{}'.format(arg, + arg_data.get('default', '(unset)'), + arg_options) + lines.extend([arg_use, desc]) + return '\n'.join(lines) + + +def construct_contents(partial_specs, image_spec): + """Assemble the dockerfile contents for an image spec. + + It assembles a concrete list of partial references into a single, large + string. + Also expands argument defaults, so that the resulting Dockerfile doesn't have + to be configured with --build-arg=... every time. That is, any ARG directive + will be updated with a new default value. + + Args: + partial_specs: The dict from spec.yml["partials"]. + image_spec: One of the dict values from spec.yml["images"]. + + Returns: + A string containing a valid Dockerfile based on the partials listed in + image_spec. + """ + processed_partial_strings = [] + for partial_name in image_spec['partials']: + + # Apply image arg-defaults to existing arg defaults + partial_spec = copy.deepcopy(partial_specs[partial_name]) + args = partial_spec.get('args', dict()) + for k_v in image_spec.get('arg-defaults', []): + arg, value = list(k_v.items())[0] + if arg in args: + args[arg]['default'] = value + + # Read partial file contents + filename = partial_spec.get('file', partial_name) + partial_path = os.path.join(FLAGS.partial_dir, + '{}.partial.Dockerfile'.format(filename)) + with open(partial_path, 'r') as f_partial: + partial_contents = f_partial.read() + + # Replace ARG FOO=BAR with ARG FOO=[new-default] + for arg, arg_data in args.items(): + if 'default' in arg_data and arg_data['default']: + default = '={}'.format(arg_data['default']) + else: + default = '' + partial_contents = re.sub(r'ARG {}.*'.format(arg), 'ARG {}{}'.format( + arg, default), partial_contents) + processed_partial_strings.append(partial_contents) + return '\n'.join(processed_partial_strings) + + +# Create a directory and its parents, even if it already exists +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def construct_documentation(header, partial_specs, image_spec): + """Assemble all of the documentation for a single dockerfile. + + Builds explanations of included partials and available build args. + + Args: + header: The string from spec.yml["header"]; will be commented and wrapped. + partial_specs: The dict from spec.yml["partials"]. + image_spec: The spec for the dockerfile being built. + + Returns: + A string containing a commented header that documents the contents of the + dockerfile. + + """ + # Comment and wrap header and image description + commented_header = '\n'.join(['# ' + l for l in header.splitlines()]) + commented_desc = '\n'.join( + ['# ' + l for l in image_spec.get('desc', '').splitlines()]) + partial_descriptions = [] + + # Build documentation for each partial in the image + for partial in image_spec['partials']: + + # Copy partial data for default args unique to this image + partial_spec = copy.deepcopy(partial_specs[partial]) + args = partial_spec.get('args', dict()) + + # Overwrite any existing arg defaults + for k_v in image_spec.get('arg-defaults', []): + arg, value = list(k_v.items())[0] + if arg in args: + args[arg]['default'] = value + + # Build the description from new args + partial_description = build_partial_description(partial_spec) + partial_descriptions.append(partial_description) + + contents = [commented_header, '#', commented_desc] + partial_descriptions + return '\n'.join(contents) + '\n' + + +def normalize_partial_args(partial_specs): + """Normalize the shorthand form of a partial's args specification. + + Turns this: + + partial: + args: + SOME_ARG: arg_value + + Into this: + + partial: + args: + SOME_ARG: + default: arg_value + + Args: + partial_specs: The dict from spec.yml["partials"]. This dict is modified in + place. + + Returns: + The modified contents of partial_specs. + + """ + for _, partial in partial_specs.items(): + args = partial.get('args', dict()) + for arg, value in args.items(): + if not isinstance(value, dict): + new_value = {'default': value} + args[arg] = new_value + return partial_specs + + +def flatten_args_references(image_specs): + """Resolve all default-args in each image spec to a concrete dict. + + Turns this: + + example-image: + arg-defaults: + - MY_ARG: ARG_VALUE + + another-example: + arg-defaults: + - ANOTHER_ARG: ANOTHER_VALUE + - example_image + + Into this: + + example-image: + arg-defaults: + - MY_ARG: ARG_VALUE + + another-example: + arg-defaults: + - ANOTHER_ARG: ANOTHER_VALUE + - MY_ARG: ARG_VALUE + + Args: + image_specs: A dict of image_spec dicts; should be the contents of the + "images" key in the global spec.yaml. This dict is modified in place and + then returned. + + Returns: + The modified contents of image_specs. + """ + for _, image_spec in image_specs.items(): + too_deep = 0 + while str in map(type, image_spec.get('arg-defaults', [])) and too_deep < 5: + new_args = [] + for arg in image_spec['arg-defaults']: + if isinstance(arg, str): + new_args.extend(image_specs[arg]['arg-defaults']) + else: + new_args.append(arg) + image_spec['arg-defaults'] = new_args + too_deep += 1 + return image_specs + + +def flatten_partial_references(image_specs): + """Resolve all partial references in each image spec to a concrete list. + + Turns this: + + example-image: + partials: + - foo + + another-example: + partials: + - bar + - image: example-image + - bat + + Into this: + + example-image: + partials: + - foo + + another-example: + partials: + - bar + - foo + - bat + Args: + image_specs: A dict of image_spec dicts; should be the contents of the + "images" key in the global spec.yaml. This dict is modified in place and + then returned. + + Returns: + The modified contents of image_specs. + """ + for _, image_spec in image_specs.items(): + too_deep = 0 + while dict in map(type, image_spec['partials']) and too_deep < 5: + new_partials = [] + for partial in image_spec['partials']: + if isinstance(partial, str): + new_partials.append(partial) + else: + new_partials.extend(image_specs[partial['image']]['partials']) + image_spec['partials'] = new_partials + too_deep += 1 + return image_specs + + +def construct_dockerfiles(tf_spec): + """Generate a mapping of {"cpu": , ...}. + + Args: + tf_spec: The full spec.yml loaded as a python object. + + Returns: + A string:string dict of short names ("cpu-devel") to Dockerfile contents. + """ + names_to_contents = dict() + image_specs = tf_spec['images'] + image_specs = flatten_partial_references(image_specs) + image_specs = flatten_args_references(image_specs) + partial_specs = tf_spec['partials'] + partial_specs = normalize_partial_args(partial_specs) + + for name, image_spec in image_specs.items(): + if not image_spec.get('create-dockerfile', True): + continue + documentation = construct_documentation(tf_spec['header'], partial_specs, + image_spec) + contents = construct_contents(partial_specs, image_spec) + names_to_contents[name] = '\n'.join([documentation, contents]) + return names_to_contents + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + with open(FLAGS.spec_file, 'r') as spec_file: + tf_spec = yaml.load(spec_file) + + # Abort if spec.yaml is invalid + if FLAGS.validate: + schema = yaml.load(SCHEMA_TEXT) + v = TfDockerValidator(schema) + if not v.validate(tf_spec): + print('>> ERROR: {} is an invalid spec! The errors are:'.format( + FLAGS.spec_file)) + print(yaml.dump(v.errors, indent=2)) + exit(1) + else: + print('>> WARNING: Not validating {}'.format(FLAGS.spec_file)) + + # Generate mapping of { "cpu-devel": "", ... } + names_to_contents = construct_dockerfiles(tf_spec) + + # Write each completed Dockerfile + if not FLAGS.dry_run: + print('>> Emptying destination dir "{}"'.format(FLAGS.output_dir)) + shutil.rmtree(FLAGS.output_dir, ignore_errors=True) + mkdir_p(FLAGS.output_dir) + else: + print('>> Skipping creation of {} (dry run)'.format(FLAGS.output_dir)) + for name, contents in names_to_contents.items(): + path = os.path.join(FLAGS.output_dir, name + '.Dockerfile') + if FLAGS.dry_run: + print('>> Skipping writing contents of {} (dry run)'.format(path)) + print(contents) + else: + mkdir_p(FLAGS.output_dir) + print('>> Writing {}'.format(path)) + with open(path, 'w') as f: + f.write(contents) + + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow/tools/dockerfiles/bashrc b/tensorflow/tools/dockerfiles/bashrc new file mode 100644 index 0000000000..7f54609e78 --- /dev/null +++ b/tensorflow/tools/dockerfiles/bashrc @@ -0,0 +1,33 @@ +export PS1="\[\e[31m\]tf-docker\[\e[m\] \[\e[33m\]\w\[\e[m\] > " +export TERM=xterm-256color +alias grep="grep --color=auto" +alias ls="ls --color=auto" + +echo -e "\e[1;31m" +cat< + Start from Nvidia's Ubuntu base image with CUDA and CuDNN, with TF + development packages. + args: + UBUNTU_VERSION: 16.04 + + python: + desc: Python is required for TensorFlow and other libraries. + args: + USE_PYTHON_3_NOT_2: + default: true + desc: Install python 3 over Python 2 + + tensorflow: + desc: Install the TensorFlow Python package. + args: + TF_PACKAGE: + default: tensorflow + options: + - tensorflow + - tensorflow-gpu + - tf-nightly + - tf-nightly-gpu + desc: The specific TensorFlow Python package to install + shell: + desc: Configure TensorFlow's shell prompt and login tools. + jupyter: + desc: Launch Jupyter on execution instead of a bash prompt. + +# =========== +# DOCKERFILES +# =========== +# Represent dockerfiles. +# Spec: +# +# name: the name of the image, referenced from other sections +# desc: A description, inserted later into the Dockerfile +# create-dockerfile: Create a dockerfile based on this. Useful for creating +# base images. Default is true +# partials: List of VALUEs, where a VALUE is either: +# - the name of a partial, which inserts that partial into this file +# - image: [name of another image], which inserts the partials from that +# image into this file +# arg-defaults: List of VALUEs, where a VALUE is either: +# - the name of another image, which loads the default args from that image +# - ARG_NAME: VALUE, which is exactly what you'd expect +images: + + nodev: + create-dockerfile: false + partials: + - python + - tensorflow + - shell + + dev: + create-dockerfile: false + partials: + - python + - bazel + - shell + + cpu: + desc: Ubuntu-based, CPU-only environment for using TensorFlow + partials: + - ubuntu + - image: nodev + + cpu-devel: + desc: > + Ubuntu-based, CPU-only environment for developing changes for + TensorFlow. + partials: + - ubuntu-devel + - image: dev + + nvidia: + desc: Ubuntu-based, Nvidia-GPU-enabled environment for using TensorFlow. + arg-defaults: + - TF_PACKAGE: tensorflow-gpu + partials: + - nvidia + - image: nodev + + nvidia-devel: + desc: > + Ubuntu-based, Nvidia-GPU-enabled environment for developing changes + for TensorFlow. + arg-defaults: + - TF_PACKAGE: tensorflow-gpu + partials: + - nvidia-devel + - image: dev + + cpu-jupyter: + desc: > + Ubuntu-based, CPU-only environment for using TensorFlow, with Jupyter + included. + partials: + - image: cpu + - jupyter + + cpu-devel-jupyter: + desc: > + Ubuntu-based, CPU-only environment for developing changes for + TensorFlow, with Jupyter included. + partials: + - image: cpu-devel + - jupyter + + nvidia-jupyter: + desc: > + Ubuntu-based, Nvidia-GPU-enabled environment for using TensorFlow, with + Jupyter included. + arg-defaults: + - nvidia + partials: + - image: nvidia + - jupyter + + nvidia-devel-jupyter: + desc: > + Ubuntu-based, Nvidia-GPU-enabled environment for developing changes for + TensorFlow, with Jupyter included. + arg-defaults: + - nvidia-devel + partials: + - image: nvidia-devel + - jupyter -- GitLab From 478c4161f2524f9e9a6b78f7de297dc7d194d37a Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Wed, 1 Aug 2018 09:35:31 -0700 Subject: [PATCH 020/380] Code changes based on Rasmus's code review suggestions on PR19403 and enhancing MklInputConversion for MKL-DNN v0.15 integration --- .../core/kernels/mkl_input_conversion_op.cc | 17 +++-- tensorflow/core/kernels/mkl_relu_op.cc | 73 ++++++++++--------- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc index dc4da33a06..fee6c44cfe 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc @@ -296,7 +296,9 @@ class MklInputConversionOp : public OpKernel { // implementation. TensorShape tf_shape0 = input_shape_0.GetTfShape(); TensorShape tf_shape1 = input_shape_1.GetTfShape(); - if (tf_shape0 == tf_shape1) { + TensorShape tensor_shape0 = input_tensor_0.shape(); + TensorShape tensor_shape1 = input_tensor_1.shape(); + if (tf_shape0 == tf_shape1 && tensor_shape0 == tensor_shape1) { auto input0_md = input_shape_0.GetMklLayout(); auto input1_md = input_shape_1.GetMklLayout(); @@ -350,7 +352,8 @@ class MklInputConversionOp : public OpKernel { } // Sanity check - bool mkl_shapes_are_same = input_shape_0 == input_shape_1; + bool mkl_shapes_are_same = ((input_shape_0 == input_shape_1) && + (tensor_shape0 == tensor_shape1)); if (mkl_shapes_are_same) { CHECK(false) << "MklInputConversionOp: Unexpected: TF shapes are " "different but MKL shapes are same"; @@ -403,7 +406,8 @@ class MklInputConversionOp : public OpKernel { } // Broadcast is needed if the shapes are not the same - if (mkl_shape->GetTfShape().num_elements() == tf_tensor->shape().num_elements() ) { + if (mkl_shape->GetTfShape().num_elements() + == tf_tensor->shape().num_elements() ) { // Both shapes are same, convert the TF input to MKL VLOG(1) << "MklInputConversionOp: No broadcast needed."; VLOG(1) << "MklInputConversionOp: Converting input " << tf_tensor_index @@ -437,16 +441,17 @@ class MklInputConversionOp : public OpKernel { bool reordered = tf_input.CheckReorderToOpMem( memory::primitive_desc(output_mkl_md, cpu_engine), tensor_out, &net); - if(!reordered) { + + if (!reordered) { // This is the case that the TF tensor has the same shape and format of // mkl tensor. However, tf_tensor can not be simply forwarded to the // output tensor since mkl data tensor is always one dimensional tensor. // Tensor::CopyFrom shares the buffer of the other tensor while set its // shape to the other tensor. CHECK(tensor_out->CopyFrom(*tf_tensor, tensor_out->shape())); - } - else + } else { stream(stream::kind::eager).submit(net).wait(); + } // -- The tensor in MKL format passes through -- ForwardMklTensorInToOut(context, mkl_tensor_index, mkl_tensor_index); diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 3d5a05be73..69f2e37b61 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -83,8 +83,9 @@ class MklEltwiseFwdPrimitive : public MklPrimitive { // Eltwise forward execute // src_data: input data buffer of src // dst_data: output data buffer of dst - void Execute(T* src_data, T* dst_data) { - context_.src_mem->set_data_handle(static_cast(src_data)); + void Execute(const T* src_data, T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); context_.dst_mem->set_data_handle(static_cast(dst_data)); context_.fwd_stream->submit(context_.fwd_primitives); @@ -261,10 +262,11 @@ class MklEltwiseBwdPrimitive : public MklPrimitive { // src_data: input data buffer of src // diff_dst_data: input data buffer of diff_dst // diff_src_data: output data buffer of diff_src - - void Execute(T* src_data, T* diff_dst_data, T* diff_src_data) { - context_.src_mem->set_data_handle(static_cast(src_data)); - context_.diff_dst_mem->set_data_handle(static_cast(diff_dst_data)); + void Execute(const T* src_data, const T* diff_dst_data, T* diff_src_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.diff_dst_mem->set_data_handle( + static_cast(const_cast(diff_dst_data))); context_.diff_src_mem->set_data_handle(static_cast(diff_src_data)); context_.bwd_stream->submit(context_.bwd_primitives); @@ -810,17 +812,15 @@ class MklReluOpBase : public OpKernel { MklEltwiseFwdPrimitiveFactory::Get(fwdParams); // prepare for execuation - T* src_data = nullptr; + const T* src_data = src_tensor.flat().data(); // check wehther src need to reorder if (src_md.data.format != eltwise_fwd->GetSrcMemoryFormat()) { src.SetUsrMem(src_md, &src_tensor); auto src_target_pd = memory::primitive_desc({{src_dims}, MklDnnType(), eltwise_fwd->GetSrcMemoryFormat()}, cpu_engine); src.CheckReorderToOpMem(src_target_pd); - src_data = static_cast(src.GetOpMem().get_data_handle()); - } else { - src_data = static_cast( - const_cast(src_tensor.flat().data())); + src_data = const_cast( + reinterpret_cast(src.GetOpMem().get_data_handle())); } // allocate dst tensor, always set it as MKL-DNN layout @@ -836,20 +836,20 @@ class MklReluOpBase : public OpKernel { dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(), dnn_shape_src.GetSizesAsMklDnnDims(), dnn_shape_src.GetTfDataFormat()); - tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T)); + tf_shape_dst.AddDim(dst_pd.get_size() / sizeof(T)); } else { - // TODO(yli135): why relu's input is TF tensor in VGG16?? dnn_shape_dst.SetMklTensor(false); tf_shape_dst = src_tensor.shape(); } Tensor* dst_tensor = nullptr; OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {src_index}, dst_index, tf_shape_dst, &dst_tensor)); + {static_cast(src_index)}, + static_cast(dst_index), + tf_shape_dst, &dst_tensor)); AllocateOutputSetMklShape(context, dst_index, dnn_shape_dst); - T* dst_data = static_cast(const_cast( - dst_tensor->flat().data())); + T* dst_data = dst_tensor->flat().data(); // execute eltwise eltwise_fwd->Execute(src_data, dst_data); @@ -874,8 +874,8 @@ class MklReluGradOpBase : public OpKernel { public: ~MklReluGradOpBase() {} - explicit MklReluGradOpBase(OpKernelConstruction* context) : - OpKernel(context) { + explicit MklReluGradOpBase(OpKernelConstruction* context) + : OpKernel(context) { } virtual void Compute_Scalar(OpKernelContext* context) = 0; @@ -964,41 +964,43 @@ class MklReluGradOpBase : public OpKernel { auto eltwise_bwd_pd = eltwise_bwd->GetEltwiseBwdPd(); // check whether need reorder for src / diff_dst - T* src_data; - T* diff_dst_data; + const T* src_data = src_tensor.flat().data(); if (src_md.data.format != eltwise_bwd->GetSrcMemoryFormat()) { src.SetUsrMem(src_md, &src_tensor); src.CheckReorderToOpMem( eltwise_bwd_pd.get()->diff_src_primitive_desc()); - src_data = static_cast(src.GetOpMem().get_data_handle()); - } else { - src_data = static_cast( - const_cast(src_tensor.flat().data())); + src_data = const_cast( + reinterpret_cast(src.GetOpMem().get_data_handle())); } + const T* diff_dst_data = diff_dst_tensor.flat().data(); if (diff_dst_md.data.format != eltwise_bwd->GetDiffDstMemoryFormat()) { diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); diff_dst.CheckReorderToOpMem( eltwise_bwd_pd.get()->diff_src_primitive_desc()); - diff_dst_data = static_cast( - diff_dst.GetOpMem().get_data_handle()); - } else { - diff_dst_data = static_cast(const_cast( - diff_dst_tensor.flat().data())); + diff_dst_data = const_cast( + reinterpret_cast(diff_dst.GetOpMem().get_data_handle())); } // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor() || + dnn_shape_diff_dst.IsMklTensor()) { auto diff_src_pd = eltwise_bwd_pd->diff_src_primitive_desc(); dnn_shape_diff_src.SetMklTensor(true); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), - dnn_shape_src.GetSizesAsMklDnnDims(), - dnn_shape_src.GetTfDataFormat()); - tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T)); + if (dnn_shape_src.IsMklTensor()) { + dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), + dnn_shape_src.GetSizesAsMklDnnDims(), + dnn_shape_src.GetTfDataFormat()); + } else { + dnn_shape_diff_src.SetTfLayout(dnn_shape_diff_dst.GetDimension(), + dnn_shape_diff_dst.GetSizesAsMklDnnDims(), + dnn_shape_diff_dst.GetTfDataFormat()); + } + tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); tf_shape_diff_src = src_tensor.shape(); @@ -1009,8 +1011,7 @@ class MklReluGradOpBase : public OpKernel { &diff_src_tensor)); AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src); - T* diff_src_data = static_cast(const_cast( - diff_src_tensor->flat().data())); + T* diff_src_data = diff_src_tensor->flat().data(); // execute eltwise bwd eltwise_bwd->Execute(src_data, diff_dst_data, diff_src_data); -- GitLab From 04fb295a409b426ea44463934c4cec251990bc37 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Wed, 1 Aug 2018 15:23:10 -0700 Subject: [PATCH 021/380] Update readme --- tensorflow/tools/dockerfiles/README.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/dockerfiles/README.md b/tensorflow/tools/dockerfiles/README.md index 1fe51adb4a..4786f8ec81 100644 --- a/tensorflow/tools/dockerfiles/README.md +++ b/tensorflow/tools/dockerfiles/README.md @@ -11,12 +11,33 @@ The Dockerfiles in the `dockerfiles` directory must have their build context set to **the directory with this README.md** to copy in helper files. For example: ```bash -$ docker build -f ./dockerfiles/cpu.Dockerfile -t tf-cpu . +$ docker build -f ./dockerfiles/cpu.Dockerfile -t tf . ``` Each Dockerfile has its own set of available `--build-arg`s which are documented in the Dockerfile itself. +## Running + +After building the image with the tag `tf` (for example): + +```bash +# A volume mount is optional but highly recommended, especially for Jupyter + +# CPU-based images +$ docker run -u $(id -u):$(id -g) -v $(PWD):/my-devel -it tf + +# GPU-based images (set up nvidia-docker2 first) +$ docker run --runtime=nvidia -u $(id -u):$(id -g) -v $(PWD):/my-devel -it tf + +# Images with Jupyter run on port 8888, and needs a volume for notebooks +$ docker run --user $(id -u):$(id -g) -p 8888:8888 -v $(PWD):/notebooks -it tf + +# Development images +$ docker run --user $(id -u):$(id -g) -it tf +docker$ git clone https://github.com/tensorflow/tensorflow +``` + ## Maintaining To make changes to TensorFlow's Dockerfiles, you'll update `spec.yml` and the -- GitLab From 00869fc36a952418ffa75fd4fd5763b993251dd2 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Wed, 1 Aug 2018 15:32:36 -0700 Subject: [PATCH 022/380] Clean up some documentation --- tensorflow/tools/dockerfiles/assembler.py | 3 +- tensorflow/tools/dockerfiles/spec.yml | 43 ++++++++++++----------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py index a33c42ace6..8e0e5923d6 100644 --- a/tensorflow/tools/dockerfiles/assembler.py +++ b/tensorflow/tools/dockerfiles/assembler.py @@ -1,6 +1,7 @@ """Assemble common TF Dockerfiles from many parts. -TODO(angerson): DO NOT SUBMIT without a detailed description of assembler. +This script constructs TF's Dockerfiles by aggregating partial +Dockerfiles. See README.md for usage examples. """ from __future__ import absolute_import diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index 48a0cb772e..4d622c53d2 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -1,6 +1,7 @@ # ====== # HEADER # ====== +# # This is commented-out and prepended to each generated Dockerfile. header: | THIS IS A GENERATED DOCKERFILE. @@ -12,19 +13,20 @@ header: | # ======== # PARTIALS # ======== -# Represent and document pieces of a Dockerfile. -# Spec: +# +# Represent and document pieces of a Dockerfile. Spec: # -# name: the name of the partial, referenced from other sections +# name: the name of the partial, is referenced from the images section # desc: A description, inserted later into the Dockerfile -# file: Alternative file prefix, e.g. file.partial.Dockerfile (default = name) +# file: Alternative file prefix, e.g. file.partial.Dockerfile. The default is +# the name of the partial. # args: A dict of ARGs in the Dockerfile; each entry has the format -# ARG_NAME: VALUE where VALUE is -# - a concrete value: becomes the default +# ARG_NAME: VALUE where VALUE is one of: # - a dict: -# desc: Arg description -# default: Default value for the arg; is written to the Dockerfile -# options: List of strings, part of documentation +# desc: Documentation for the arg +# default: Default value for the arg; is written to the Dockerfile +# options: List of strings, part of documentation +# - a concrete value: the same as a dictionary with default: [value]. partials: ubuntu: @@ -75,23 +77,24 @@ partials: jupyter: desc: Launch Jupyter on execution instead of a bash prompt. -# =========== -# DOCKERFILES -# =========== -# Represent dockerfiles. -# Spec: +# ====== +# IMAGES +# ====== +# +# Represent Dockerfiles. Spec: # -# name: the name of the image, referenced from other sections +# name: the name of the image, possibly referenced by other images # desc: A description, inserted later into the Dockerfile # create-dockerfile: Create a dockerfile based on this. Useful for creating -# base images. Default is true +# extensible base images that don't need a file. Default is true. # partials: List of VALUEs, where a VALUE is either: -# - the name of a partial, which inserts that partial into this file +# - the name of a partial, which inserts that partial into this image # - image: [name of another image], which inserts the partials from that -# image into this file +# image into this image # arg-defaults: List of VALUEs, where a VALUE is either: -# - the name of another image, which loads the default args from that image -# - ARG_NAME: VALUE, which is exactly what you'd expect +# - ARG_NAME: VALUE, which sets the ARG_NAME to VALUE wherever it appears +# in this image's partials +# - [name of another image], which loads the default args from that image images: nodev: -- GitLab From ffc12e18fbf7acbaf67a11f1470dc54786e2cc17 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 7 Aug 2018 14:53:26 -0700 Subject: [PATCH 023/380] rebase mkl_util.h with master branch --- tensorflow/core/util/mkl_util.h | 107 ++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 41 deletions(-) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 21868fa2c0..a66b1215bd 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -17,9 +17,10 @@ limitations under the License. #define TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ #ifdef INTEL_MKL -#include +#include #include #include +#include #ifdef INTEL_MKL_ML #include "mkl_dnn.h" @@ -34,11 +35,11 @@ limitations under the License. #include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" - #ifndef INTEL_MKL_ML #include "mkldnn.hpp" #include "tensorflow/core/lib/core/stringpiece.h" @@ -1503,7 +1504,8 @@ class MklDnnData { /// Operations memory descriptor memory::desc* op_md_; - + /// Operations temp buffer + void* allocated_buffer_; /// CPU engine on which operation will be executed const engine* cpu_engine_; @@ -1512,6 +1514,7 @@ class MklDnnData { : user_memory_(nullptr), reorder_memory_(nullptr), op_md_(nullptr), + allocated_buffer_(nullptr), cpu_engine_(e) {} ~MklDnnData() { @@ -1652,6 +1655,14 @@ class MklDnnData { user_memory_->set_data_handle(GetTensorBuffer(tensor)); } + /// allocate function for data buffer + inline void AllocateBuffer(size_t size) { + const int64 kMemoryAlginment = 64; // For AVX512 memory alignment. + allocated_buffer_ = cpu_allocator()->AllocateRaw(kMemoryAlginment, size); + } + + inline void* GetAllocatedBuffer() { return allocated_buffer_; } + /// Get the memory primitive for input and output of an op. If inputs /// to an op require reorders, then this function returns memory primitive /// for reorder. Otherwise, it will return memory primitive for user memory. @@ -1873,7 +1884,6 @@ class MklDnnData { net.push_back(FindOrCreateReorder(reorder_memory_, user_memory_)); stream(stream::kind::eager).submit(net).wait(); } - }; /// Base class for operations with reuse of primitives @@ -1882,9 +1892,8 @@ class MklPrimitive { public: virtual ~MklPrimitive() {} - // Dummy data. Its size, hard-coded as 256 here, does - // not matter since MKL should never operate on this buffer. - unsigned char DummyData[256]; + // Dummy data which MKL DNN never operates on + unsigned char* DummyData = nullptr; }; const mkldnn::memory::dims NONE_DIMS = {}; @@ -1896,8 +1905,9 @@ class MklPrimitiveFactory { ~MklPrimitiveFactory() {} MklPrimitive* GetOp(const string& key) { - auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); - if (stream_iter == MklPrimitiveFactory::GetHashMap().end()) { + auto& map = MklPrimitiveFactory::GetHashMap(); + auto stream_iter = map.find(key); + if (stream_iter == map.end()) { return nullptr; } else { CHECK(stream_iter->second != nullptr) << "nullptr present in map"; @@ -1906,7 +1916,8 @@ class MklPrimitiveFactory { } void SetOp(const string& key, MklPrimitive* op) { - auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); + auto& map = MklPrimitiveFactory::GetHashMap(); + auto stream_iter = map.find(key); CHECK(stream_iter == map.end()); @@ -1955,11 +1966,25 @@ class FactoryKeyCreator { } }; +static inline memory::format get_desired_format(int channel) { + memory::format fmt_desired = memory::format::any; + + if (port::TestCPUFeature(port::CPUFeature::AVX512F) && (channel % 16) == 0) { + fmt_desired = memory::format::nChw16c; + } else if (port::TestCPUFeature(port::CPUFeature::AVX2) && + (channel % 8) == 0) { + fmt_desired = memory::format::nChw8c; + } else { + fmt_desired = memory::format::nchw; + } + return fmt_desired; +} + class MklReorderPrimitive : public MklPrimitive { - public: - explicit MklReorderPrimitive(const memory* from, const memory* to) { - Setup(from, to); - } + public: + explicit MklReorderPrimitive(const memory* from, const memory* to) { + Setup(from, to); + } ~MklReorderPrimitive() {} std::shared_ptr GetPrimitive() { @@ -1971,7 +1996,7 @@ class MklReorderPrimitive : public MklPrimitive { context_.dst_mem->set_data_handle(to->get_data_handle()); } - private: + private: struct ReorderContext { std::shared_ptr src_mem; std::shared_ptr dst_mem; @@ -1995,28 +2020,27 @@ class MklReorderPrimitive : public MklPrimitive { template class MklReorderPrimitiveFactory : public MklPrimitiveFactory { - public: - static MklReorderPrimitive* Get(const memory* from, - const memory* to) { - auto reorderPrim = static_cast( + public: + static MklReorderPrimitive* Get(const memory* from, const memory* to) { + auto reorderPrim = static_cast( MklReorderPrimitiveFactory::GetInstance().GetReorder(from, to)); - if (reorderPrim == nullptr) { - reorderPrim = new MklReorderPrimitive(from, to); - MklReorderPrimitiveFactory::GetInstance().SetReorder( - from, to, reorderPrim); - } - reorderPrim->SetMemory(from, to); - return reorderPrim; + if (reorderPrim == nullptr) { + reorderPrim = new MklReorderPrimitive(from, to); + MklReorderPrimitiveFactory::GetInstance().SetReorder(from, to, + reorderPrim); } + reorderPrim->SetMemory(from, to); + return reorderPrim; + } static MklReorderPrimitiveFactory & GetInstance() { static MklReorderPrimitiveFactory instance_; return instance_; } - private: - MklReorderPrimitiveFactory() {}; - ~MklReorderPrimitiveFactory() {}; + private: + MklReorderPrimitiveFactory() {} + ~MklReorderPrimitiveFactory() {} static string CreateKey(const memory* from, const memory* to) { string prefix = "reorder"; @@ -2046,18 +2070,19 @@ class MklReorderPrimitiveFactory : public MklPrimitiveFactory { } }; - /// Fuction to find(or create) a reorder from memory pointed by from to memory pointed - /// by to, it will created primitive or get primitive from pool if it is cached. - /// Returns the primitive. - template - inline primitive FindOrCreateReorder(const memory* from, const memory* to) { - CHECK_NOTNULL(from); - CHECK_NOTNULL(to); - MklReorderPrimitive *reorder_prim = - MklReorderPrimitiveFactory::Get(from, to); - return *reorder_prim->GetPrimitive(); - } - +/// Fuction to find(or create) a reorder from memory pointed by +/// from to memory pointed by to, it will created primitive or +/// get primitive from pool if it is cached. +/// Returns the primitive. +template +inline primitive FindOrCreateReorder(const memory* from, const memory* to) { + CHECK_NOTNULL(from); + CHECK_NOTNULL(to); + MklReorderPrimitive* reorder_prim = + MklReorderPrimitiveFactory::Get(from, to); + return *reorder_prim->GetPrimitive(); +} + #endif // INTEL_MKL_DNN } // namespace tensorflow -- GitLab From 10ca9a8fb215e66d25a8469c5eeb5b8d6c02e05e Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Tue, 7 Aug 2018 15:29:24 -0700 Subject: [PATCH 024/380] RNN.call should get initial state from full input spec --- tensorflow/python/keras/layers/recurrent.py | 8 ++++++ tensorflow/python/keras/layers/wrappers.py | 26 +++++++++++++++---- .../keras/utils/multi_gpu_utils_test.py | 17 ++++++++++++ 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py index a8bfdf25f2..85d0a70203 100644 --- a/tensorflow/python/keras/layers/recurrent.py +++ b/tensorflow/python/keras/layers/recurrent.py @@ -583,6 +583,14 @@ class RNN(Layer): # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): + # get initial_state from full input spec + # as they could be copied to multiple GPU. + if self._num_constants is None: + initial_state = inputs[1:] + else: + initial_state = inputs[1:-self._num_constants] + if len(initial_state) == 0: + initial_state = None inputs = inputs[0] if initial_state is not None: pass diff --git a/tensorflow/python/keras/layers/wrappers.py b/tensorflow/python/keras/layers/wrappers.py index f0c1e76156..cf2e139fad 100644 --- a/tensorflow/python/keras/layers/wrappers.py +++ b/tensorflow/python/keras/layers/wrappers.py @@ -545,11 +545,27 @@ class Bidirectional(Wrapper): if initial_state is not None and generic_utils.has_arg( self.layer.call, 'initial_state'): - forward_state = initial_state[:len(initial_state) // 2] - backward_state = initial_state[len(initial_state) // 2:] - y = self.forward_layer.call(inputs, initial_state=forward_state, **kwargs) - y_rev = self.backward_layer.call( - inputs, initial_state=backward_state, **kwargs) + forward_inputs = [inputs[0]] + backward_inputs = [inputs[0]] + pivot = len(initial_state) // 2 + 1 + # add forward initial state + forward_state = inputs[1:pivot] + forward_inputs += forward_state + if self._num_constants is None: + # add backward initial state + backward_state = inputs[pivot:] + backward_inputs += backward_state + else: + # add backward initial state + backward_state = inputs[pivot:-self._num_constants] + backward_inputs += backward_state + # add constants for forward and backward layers + forward_inputs += inputs[-self._num_constants:] + backward_inputs += inputs[-self._num_constants:] + y = self.forward_layer.call(forward_inputs, + initial_state=forward_state, **kwargs) + y_rev = self.backward_layer.call(backward_inputs, + initial_state=backward_state, **kwargs) else: y = self.forward_layer.call(inputs, **kwargs) y_rev = self.backward_layer.call(inputs, **kwargs) diff --git a/tensorflow/python/keras/utils/multi_gpu_utils_test.py b/tensorflow/python/keras/utils/multi_gpu_utils_test.py index 77792d14f5..c7e94998b4 100644 --- a/tensorflow/python/keras/utils/multi_gpu_utils_test.py +++ b/tensorflow/python/keras/utils/multi_gpu_utils_test.py @@ -180,6 +180,23 @@ class TestMultiGPUModel(test.TestCase): target_tensors=[targets]) parallel_model.fit(epochs=1, steps_per_epoch=3) + def test_multi_gpu_with_multi_input_layers(self): + gpus = 2 + + if not check_if_compatible_devices(gpus=gpus): + return + + with self.test_session(): + inputs = keras.Input((4, 3)) + init_state = keras.Input((3,)) + outputs = keras.layers.SimpleRNN( + 3, return_sequences=True)(inputs, initial_state=init_state) + x = [np.random.randn(2, 4, 3), np.random.randn(2, 3)] + y = np.random.randn(2, 4, 3) + model = keras.Model([inputs, init_state], outputs) + parallel_model = keras.utils.multi_gpu_model(model, gpus=gpus) + parallel_model.compile(loss='mean_squared_error', optimizer='adam') + parallel_model.train_on_batch(x, y) if __name__ == '__main__': test.main() -- GitLab From b2470ca8a93a7a4bd960ba7dff65be74779c4f62 Mon Sep 17 00:00:00 2001 From: Jonathan Date: Thu, 26 Jul 2018 23:03:33 +0800 Subject: [PATCH 025/380] modify _TopKGrad so that all operations can run on GPU for better performance --- tensorflow/python/ops/nn_grad.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index df23ac55ce..025ce7ce88 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -975,25 +975,31 @@ def _TopKGrad(op, grad, _): in_shape = array_ops.shape(op.inputs[0]) ind_shape = array_ops.shape(op.outputs[1]) - ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1) + # int32 is not supported on GPU hence up-casting + ind_lastdim = array_ops.gather(math_ops.cast( + ind_shape, dtypes.int64), array_ops.size(ind_shape) - 1) # Flatten indices to 2D. ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack([-1, ind_lastdim])) - in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1) + in_lastdim = array_ops.gather(math_ops.cast( + in_shape, dtypes.int64), array_ops.size(in_shape) - 1) outerdim = array_ops.shape(ind_2d)[0] # Compute linear indices (flattened to 1D). - ind = array_ops.reshape(ind_2d + array_ops.expand_dims( - math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1]) + ind = array_ops.reshape(ind_2d + math_ops.cast(array_ops.expand_dims( + math_ops.range(0, math_ops.cast(outerdim, dtypes.int64) + * in_lastdim, in_lastdim), -1 + ), dtypes.int32), [-1]) # Substitute grad to appropriate locations and fill the rest with zeros, # finally reshaping it to the original input shape. return [ array_ops.reshape( - sparse_ops.sparse_to_dense( - ind, - array_ops.reshape(math_ops.reduce_prod(in_shape), [1]), + array_ops.scatter_nd( + array_ops.expand_dims(ind, -1), array_ops.reshape(grad, [-1]), - validate_indices=False), in_shape), + [math_ops.reduce_prod(in_shape)] + ), + in_shape), array_ops.zeros([], dtype=dtypes.int32) ] -- GitLab From dbfa330c963f9e015cc66ad4aebdd7985651c024 Mon Sep 17 00:00:00 2001 From: drpngx Date: Fri, 10 Aug 2018 13:19:23 -0700 Subject: [PATCH 026/380] Address ebrevdo --- tensorflow/python/ops/nn_grad.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 025ce7ce88..59ba0091c8 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -987,8 +987,7 @@ def _TopKGrad(op, grad, _): # Compute linear indices (flattened to 1D). ind = array_ops.reshape(ind_2d + math_ops.cast(array_ops.expand_dims( math_ops.range(0, math_ops.cast(outerdim, dtypes.int64) - * in_lastdim, in_lastdim), -1 - ), dtypes.int32), [-1]) + * in_lastdim, in_lastdim), -1), dtypes.int32), [-1]) # Substitute grad to appropriate locations and fill the rest with zeros, # finally reshaping it to the original input shape. -- GitLab From 026004e05dc172d1639840055462013f95e56bbe Mon Sep 17 00:00:00 2001 From: Seb Bro Date: Sat, 11 Aug 2018 22:04:08 +0200 Subject: [PATCH 027/380] Fix latex text (docs). Escape special character and add text section for comments. --- tensorflow/python/training/adam.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py index bcbe5907d6..86b854c0de 100644 --- a/tensorflow/python/training/adam.py +++ b/tensorflow/python/training/adam.py @@ -43,15 +43,15 @@ class AdamOptimizer(optimizer.Optimizer): Initialization: - $$m_0 := 0 (Initialize initial 1st moment vector)$$ - $$v_0 := 0 (Initialize initial 2nd moment vector)$$ - $$t := 0 (Initialize timestep)$$ + $$m_0 := 0 \text{(Initialize initial 1st moment vector)}$$ + $$v_0 := 0 \text{(Initialize initial 2nd moment vector)}$$ + $$t := 0 \text{(Initialize timestep)}$$ The update rule for `variable` with gradient `g` uses an optimization described at the end of section2 of the paper: $$t := t + 1$$ - $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$ + $$lr_t := \text{learning\_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$ $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ -- GitLab From 62049835ce3064c191a8054ec1056b4701afb744 Mon Sep 17 00:00:00 2001 From: Seb Bro Date: Sat, 11 Aug 2018 22:19:44 +0200 Subject: [PATCH 028/380] Fix sqrt in lr formula. --- tensorflow/python/training/adam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py index 86b854c0de..704ad6d3fe 100644 --- a/tensorflow/python/training/adam.py +++ b/tensorflow/python/training/adam.py @@ -51,7 +51,7 @@ class AdamOptimizer(optimizer.Optimizer): described at the end of section2 of the paper: $$t := t + 1$$ - $$lr_t := \text{learning\_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$ + $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$ $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ -- GitLab From a4667873d90cf2c8530e8a8058e7d1c065639ce8 Mon Sep 17 00:00:00 2001 From: Seb Bro Date: Sat, 11 Aug 2018 22:23:21 +0200 Subject: [PATCH 029/380] Fix formula. --- tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt index b90f5473c8..6341eeda32 100644 --- a/tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt @@ -82,7 +82,7 @@ END } summary: "Update \'*var\' according to the Adam algorithm." description: < Date: Sat, 11 Aug 2018 22:23:52 +0200 Subject: [PATCH 030/380] Fix formula. --- .../core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt index ad0aeac004..2dcd136ae3 100644 --- a/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt @@ -76,7 +76,7 @@ END } summary: "Update \'*var\' according to the Adam algorithm." description: < Date: Sat, 11 Aug 2018 22:26:40 +0200 Subject: [PATCH 031/380] Fix formula and text rendering. --- tensorflow/contrib/optimizer_v2/adam.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/optimizer_v2/adam.py b/tensorflow/contrib/optimizer_v2/adam.py index 631d4f44df..04b1552b61 100644 --- a/tensorflow/contrib/optimizer_v2/adam.py +++ b/tensorflow/contrib/optimizer_v2/adam.py @@ -40,15 +40,14 @@ class AdamOptimizer(optimizer_v2.OptimizerV2): Initialization: - $$m_0 := 0 (Initialize initial 1st moment vector)$$ - $$v_0 := 0 (Initialize initial 2nd moment vector)$$ - $$t := 0 (Initialize timestep)$$ - + $$m_0 := 0 \text{(Initialize initial 1st moment vector)}$$ + $$v_0 := 0 \text{(Initialize initial 2nd moment vector)}$$ + $$t := 0 \text{(Initialize timestep)}$$ The update rule for `variable` with gradient `g` uses an optimization described at the end of section2 of the paper: $$t := t + 1$$ - $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$ + $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$ $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ -- GitLab From 135ac89cae38464a9c6ea21af244e4a1bda255ed Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Mon, 13 Aug 2018 15:52:43 -0700 Subject: [PATCH 032/380] enable pooling3D op --- tensorflow/core/graph/mkl_layout_pass.cc | 28 +++- .../core/graph/mkl_tfconversion_pass.cc | 12 +- tensorflow/core/kernels/mkl_aggregate_ops.cc | 20 ++- tensorflow/core/kernels/mkl_avgpooling_op.cc | 51 ++++--- tensorflow/core/kernels/mkl_maxpooling_op.cc | 59 +++++--- .../core/kernels/mkl_pooling_ops_common.cc | 129 ++++++++++++----- .../core/kernels/mkl_pooling_ops_common.h | 132 +++++++++++++----- tensorflow/core/ops/nn_ops.cc | 98 +++++++++++++ tensorflow/core/util/mkl_util.h | 114 +++++++++++++-- 9 files changed, 519 insertions(+), 124 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 5683944e46..30e48d3860 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -334,6 +334,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.conv2d_grad_input, mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input), CopyAttrsConv2D, AlwaysRewrite, nullptr}); + rinfo_.push_back({csinfo_.fused_batch_norm, mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm), CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr}); @@ -546,14 +547,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // If Op has been specifically assigned to a non-CPU device, then No. if (!n->assigned_device_name().empty() && - !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) { + !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) { result = false; reason = "Op has been assigned a runtime device that is not CPU."; } // If user has specifically assigned this op to a non-CPU device, then No. if (!n->def().device().empty() && - !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) { + !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) { result = false; reason = "User has assigned a device that is not CPU."; } @@ -2408,6 +2409,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { csinfo_.addn = "AddN"; csinfo_.avg_pool = "AvgPool"; csinfo_.avg_pool_grad = "AvgPoolGrad"; + csinfo_.avg_pool3d = "AvgPool3D"; + csinfo_.avg_pool3d_grad = "AvgPool3DGrad"; csinfo_.bias_add = "BiasAdd"; csinfo_.bias_add_grad = "BiasAddGrad"; csinfo_.concat = "Concat"; @@ -2426,6 +2429,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { csinfo_.matmul = "MatMul"; csinfo_.max_pool = "MaxPool"; csinfo_.max_pool_grad = "MaxPoolGrad"; + csinfo_.max_pool3d = "MaxPool3D"; + csinfo_.max_pool3d_grad = "MaxPool3DGrad"; csinfo_.mkl_conv2d = "_MklConv2D"; csinfo_.mkl_conv2d_grad_input = "_MklConv2DBackpropInput"; csinfo_.mkl_conv2d_grad_filter = "_MklConv2DBackpropFilter"; @@ -2460,6 +2465,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.avg_pool_grad, mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad), CopyAttrsPooling, AlwaysRewrite}); + rinfo_.push_back({csinfo_.avg_pool3d, + mkl_op_registry::GetMklOpName(csinfo_.avg_pool3d), + CopyAttrsPooling, AlwaysRewrite}); + rinfo_.push_back({csinfo_.avg_pool3d_grad, + mkl_op_registry::GetMklOpName(csinfo_.avg_pool3d_grad), + CopyAttrsPooling, AlwaysRewrite}); rinfo_.push_back({csinfo_.concat, mkl_op_registry::GetMklOpName(csinfo_.concat), CopyAttrsConcat, AlwaysRewrite}); @@ -2501,7 +2512,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.max_pool_grad, mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad), CopyAttrsPooling, MaxpoolGradRewrite}); - + rinfo_.push_back({csinfo_.max_pool3d, + mkl_op_registry::GetMklOpName(csinfo_.max_pool3d), + CopyAttrsPooling, NonDepthBatchWisePoolRewrite}); + rinfo_.push_back({csinfo_.max_pool3d_grad, + mkl_op_registry::GetMklOpName(csinfo_.max_pool3d_grad), + CopyAttrsPooling, AlwaysRewrite}); rinfo_.push_back({csinfo_.maximum, mkl_op_registry::GetMklOpName(csinfo_.maximum), CopyAttrsDataType, AlwaysRewrite}); @@ -2538,6 +2554,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // Add info about which ops to add workspace edge to and the slots. wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3}); wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3}); + wsinfo_.push_back + ({csinfo_.max_pool3d, csinfo_.max_pool3d_grad, 0, 1, 1, 3}); // Add a rule for merging nodes minfo_.push_back({csinfo_.conv2d, csinfo_.bias_add, @@ -2605,6 +2623,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string add; string avg_pool; string avg_pool_grad; + string avg_pool3d; + string avg_pool3d_grad; string bias_add; string bias_add_grad; string concat; @@ -2622,6 +2642,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string matmul; string max_pool; string max_pool_grad; + string max_pool3d; + string max_pool3d_grad; string maximum; string mkl_conv2d; string mkl_conv2d_grad_input; diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc index aa39af637f..b67a321fc1 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc @@ -175,7 +175,11 @@ Status MklToTfConversionPass::InsertConversionNodeOnEdge( .Finalize(&**g, &conversion_node)); CHECK_NOTNULL(conversion_node); - if (GetNodeAttr(src->def(), "data_format", &data_format) == Status::OK()) { + // TODO(Intel-tf) MklToTf accepts only NHWC or NCHW, but doesn't seem to be + // using data_format. This code might be redundant. + if (GetNodeAttr(src->def(), "data_format", &data_format) == Status::OK() && + (data_format == ToString(FORMAT_NHWC) || + data_format == ToString(FORMAT_NCHW))) { conversion_node->AddAttr("data_format", data_format); } @@ -254,9 +258,13 @@ Status MklToTfConversionPass::InsertInputConversionNode( } } + // TODO(Intel-tf) MklInputConversion accepts only NHWC or NCHW, but doesn't + // seem to be using data_format. This code might be redundant. string data_format; if (GetNodeAttr(edges[0]->src()->def(), "data_format", &data_format) == - Status::OK()) { + Status::OK() && + (data_format == ToString(FORMAT_NHWC) || + data_format == ToString(FORMAT_NCHW))) { conversion_node->AddAttr("data_format", data_format); } diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl_aggregate_ops.cc index 28edf51546..20aa1f7ea1 100644 --- a/tensorflow/core/kernels/mkl_aggregate_ops.cc +++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc @@ -392,16 +392,28 @@ class MklAddNOp : public OpKernel { memory::format src1_mkl_data_format = src1_mkl_shape.GetTfDataFormat(); auto src1_tf_data_format = MklDnnDataFormatToTFDataFormat(src1_mkl_data_format); - auto src2_dims = - TFShapeToMklDnnDimsInNCHW(src2_tensor.shape(), src1_tf_data_format); + memory::dims src2_dims; + if (src2_tensor.dims() == 4) { + src2_dims = TFShapeToMklDnnDimsInNCHW(src2_tensor.shape(), + src1_tf_data_format); + } else { + src2_dims = TFShapeToMklDnnDimsInNCDHW(src2_tensor.shape(), + src1_tf_data_format); + } md2 = memory::desc(src2_dims, MklDnnType(), src1_mkl_data_format); } else if (input2_in_mkl_format && !input1_in_mkl_format) { // Same comment as above. memory::format src2_mkl_data_format = src2_mkl_shape.GetTfDataFormat(); auto src2_tf_data_format = MklDnnDataFormatToTFDataFormat(src2_mkl_data_format); - auto src1_dims = - TFShapeToMklDnnDimsInNCHW(src1_tensor.shape(), src2_tf_data_format); + memory::dims src1_dims; + if (src1_tensor.dims() == 4) { + src1_dims = TFShapeToMklDnnDimsInNCHW(src1_tensor.shape(), + src2_tf_data_format); + } else { + src1_dims = TFShapeToMklDnnDimsInNCDHW(src1_tensor.shape(), + src2_tf_data_format); + } md1 = memory::desc(src1_dims, MklDnnType(), src2_mkl_data_format); md2 = src2_mkl_shape.GetMklLayout(); diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc index 969baecc51..749b2a1838 100644 --- a/tensorflow/core/kernels/mkl_avgpooling_op.cc +++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc @@ -453,6 +453,8 @@ class MklAvgPoolingOp : public MklPoolingForwardOpBase { // initialize variables for the pooling op MklPoolParameters pool_params; + // check whether pooling is 2D or 3D + bool isPool2D = (this->ksize_.size() == 4); // Get the input tensor and initialize the pooling parameters TensorShape input_tensor_shape = input_tensor.shape(); this->InitMklPoolParameters(context, &pool_params, dnn_shape_input, @@ -473,23 +475,22 @@ class MklAvgPoolingOp : public MklPoolingForwardOpBase { } memory::dims filter_dims, strides, padding_left, padding_right; + // Get src/filter/stride/padding information this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right); + &padding_left, &padding_right, isPool2D); // Get the input memory descriptor - memory::desc input_md = - dnn_shape_input.IsMklTensor() - ? dnn_shape_input.GetMklLayout() - : memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape, - this->data_format_tf_), - MklDnnType(), this->data_format_mkldnn_); - - // Get src/filter/stride/padding information memory::dims src_dims = dnn_shape_input.IsMklTensor() ? dnn_shape_input.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), - this->data_format_tf_); + : isPool2D ? TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(input_tensor.shape(), + this->data_format_tf_); + memory::desc input_md = dnn_shape_input.IsMklTensor() + ? dnn_shape_input.GetMklLayout() + : memory::desc(src_dims, MklDnnType(), + this->data_format_mkldnn_); // Get an average pooling primitive from the op pool MklPoolingFwdPrimitive* pooling_fwd = nullptr; @@ -562,24 +563,30 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { for (int i = 0; i < orig_input_tensor.NumElements(); i++) { orig_input_shape.AddDim(shape_vec(i)); } + + bool isPool2D = (this->ksize_.size() == 4); this->InitMklPoolParameters(context, &pool_params, orig_input_mkl_shape, orig_input_shape); memory::dims filter_dims, strides, padding_left, padding_right; this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right); + &padding_left, &padding_right, isPool2D); memory::dims orig_input_dims_mkl_order = orig_input_mkl_shape.IsMklTensor() ? orig_input_mkl_shape.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(orig_input_shape, - this->data_format_tf_); + : isPool2D ? TFShapeToMklDnnDimsInNCHW(orig_input_shape, + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(orig_input_shape, + this->data_format_tf_); memory::dims diff_dst_dims = grad_mkl_shape.IsMklTensor() ? grad_mkl_shape.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), - this->data_format_tf_); + : isPool2D ? TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(grad_tensor.shape(), + this->data_format_tf_); memory::dims output_dims_mkl_order; this->GetOutputDims(pool_params, &output_dims_mkl_order); @@ -664,6 +671,18 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { } }; // MklAvgPoolingGradOp +REGISTER_KERNEL_BUILDER(Name("_MklAvgPool3D") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .Label(mkl_op_registry::kMklOpLabel), + MklAvgPoolingOp); + +REGISTER_KERNEL_BUILDER(Name("_MklAvgPool3DGrad") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .Label(mkl_op_registry::kMklOpLabel), + MklAvgPoolingGradOp); + #endif // INTEL_MKL_ML_ONLY REGISTER_KERNEL_BUILDER(Name("_MklAvgPool") diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc index e149f003e5..aa7c0d9b7f 100644 --- a/tensorflow/core/kernels/mkl_maxpooling_op.cc +++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc @@ -524,6 +524,8 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase { // initialize variables for the pooling op MklPoolParameters pool_params; + // check whether pooling is 2D or 3D + bool isPool2D = (this->ksize_.size() == 4); // Get the input tensor and initialize the pooling parameters TensorShape input_tensor_shape = input_tensor.shape(); this->InitMklPoolParameters(context, &pool_params, dnn_shape_input, @@ -547,20 +549,26 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase { memory::desc input_md = dnn_shape_input.IsMklTensor() ? dnn_shape_input.GetMklLayout() - : memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape, - this->data_format_tf_), - MklDnnType(), this->data_format_mkldnn_); + : isPool2D ? memory::desc( + TFShapeToMklDnnDimsInNCHW(input_tensor_shape, + this->data_format_tf_), + MklDnnType(), this->data_format_mkldnn_) + : memory::desc( + TFShapeToMklDnnDimsInNCDHW( + input_tensor_shape, this->data_format_tf_), + MklDnnType(), this->data_format_mkldnn_); // Get src/filter/stride/padding information memory::dims src_dims = dnn_shape_input.IsMklTensor() ? dnn_shape_input.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), - this->data_format_tf_); - + : isPool2D ? TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(input_tensor.shape(), + this->data_format_tf_); memory::dims filter_dims, strides, padding_left, padding_right; this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right); + &padding_left, &padding_right, isPool2D); // Get a pooling op from the cached pool MklPoolingFwdPrimitive* pooling_fwd = nullptr; @@ -663,23 +671,30 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase { MklPoolParameters pool_params; TensorShape orig_input_shape = orig_input_tensor.shape(); + + bool isPool2D = (this->ksize_.size() == 4); this->InitMklPoolParameters(context, &pool_params, orig_input_mkl_shape, orig_input_shape); memory::dims filter_dims, strides, padding_left, padding_right; this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right); + &padding_left, &padding_right, isPool2D); - memory::dims diff_dst_dims = - grad_mkl_shape.IsMklTensor() - ? grad_mkl_shape.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), - this->data_format_tf_); memory::dims orig_input_dims_mkl_order = orig_input_mkl_shape.IsMklTensor() ? orig_input_mkl_shape.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(orig_input_shape, - this->data_format_tf_); + : isPool2D ? TFShapeToMklDnnDimsInNCHW(orig_input_shape, + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(orig_input_shape, + this->data_format_tf_); + + memory::dims diff_dst_dims = + grad_mkl_shape.IsMklTensor() + ? grad_mkl_shape.GetSizesAsMklDnnDims() + : isPool2D ? TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(grad_tensor.shape(), + this->data_format_tf_); memory::dims output_dims_mkl_order; this->GetOutputDims(pool_params, &output_dims_mkl_order); @@ -715,7 +730,7 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase { void* ws_data = static_cast( const_cast(workspace_tensor.flat().data())); - ; + auto ws_md = pooling_bwd->GetPoolingFwdPd()->workspace_primitive_desc().desc(); if (ws_md.data.format != pooling_bwd->GetWorkspaceFormat()) { @@ -817,6 +832,18 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase { } }; // MklMaxPoolingGradOp +REGISTER_KERNEL_BUILDER(Name("_MklMaxPool3D") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .Label(mkl_op_registry::kMklOpLabel), + MklMaxPoolingOp); + +REGISTER_KERNEL_BUILDER(Name("_MklMaxPool3DGrad") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .Label(mkl_op_registry::kMklOpLabel), + MklMaxPoolingGradOp); + #endif // INTEL_MKL_ML_ONLY REGISTER_KERNEL_BUILDER(Name("_MklMaxPool") diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc index d7ad3f9dcd..5d02ceea12 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc @@ -24,7 +24,7 @@ limitations under the License. namespace tensorflow { -#ifndef INTEL_MKL_ML +#ifndef INTEL_MKL_ML_ONLY using mkldnn::pooling_avg; using mkldnn::pooling_avg_exclude_padding; @@ -46,9 +46,10 @@ void MklPoolingFwdPrimitive::Setup(const MklPoolingParams& fwdParams) { // so src format is currently hard-coded. // A utility function is used to do this, // which may be broken with future CPU architectures + bool is_2d = (fwdParams.src_dims.size() == 4); context_.src_md.reset( new memory::desc({fwdParams.src_dims}, MklDnnType(), - get_desired_format(fwdParams.src_dims[1]))); + get_desired_format(fwdParams.src_dims[1], is_2d))); context_.dst_md.reset(new memory::desc({fwdParams.dst_dims}, MklDnnType(), memory::format::any)); @@ -61,7 +62,7 @@ void MklPoolingFwdPrimitive::Setup(const MklPoolingParams& fwdParams) { new pooling_forward::primitive_desc(*context_.fwd_desc, cpu_engine_)); // store expected primitive format - context_.src_fmt = get_desired_format(fwdParams.src_dims[1]); + context_.src_fmt = get_desired_format(fwdParams.src_dims[1], is_2d); context_.dst_fmt = static_cast( context_.fwd_pd.get()->dst_primitive_desc().desc().data.format); @@ -126,12 +127,14 @@ void MklPoolingBwdPrimitive::Setup(const MklPoolingParams& bwdParams) { } context_.alg_kind = bwdParams.alg_kind; + // check whether it is 2d or 3d + bool is_2d = (bwdParams.dst_dims.size() == 4); // Create memory desc context_.diff_src_md.reset(new memory::desc( {bwdParams.src_dims}, MklDnnType(), memory::format::any)); context_.diff_dst_md.reset( new memory::desc({bwdParams.dst_dims}, MklDnnType(), - get_desired_format(bwdParams.dst_dims[1]))); + get_desired_format(bwdParams.dst_dims[1], is_2d))); context_.bwd_desc.reset(new pooling_backward::desc( bwdParams.alg_kind, *context_.diff_src_md, *context_.diff_dst_md, bwdParams.strides, bwdParams.filter_dims, bwdParams.padding_left, @@ -151,7 +154,7 @@ void MklPoolingBwdPrimitive::Setup(const MklPoolingParams& bwdParams) { // store expected primitive format context_.diff_src_fmt = static_cast( context_.bwd_pd.get()->diff_src_primitive_desc().desc().data.format); - context_.diff_dst_fmt = get_desired_format(bwdParams.dst_dims[1]); + context_.diff_dst_fmt = get_desired_format(bwdParams.dst_dims[1], is_2d); // create MKL-DNN internal memory object with dummy data context_.diff_src_mem.reset( @@ -165,7 +168,7 @@ void MklPoolingBwdPrimitive::Setup(const MklPoolingParams& bwdParams) { if (bwdParams.alg_kind == pooling_max) { auto ws_pd = context_.fwd_pd.get()->workspace_primitive_desc().desc().data; context_.ws_dims.assign(ws_pd.dims, ws_pd.dims + ws_pd.ndims); - context_.ws_fmt = get_desired_format(context_.ws_dims[1]); + context_.ws_fmt = get_desired_format(context_.ws_dims[1], is_2d); context_.ws_dt = static_cast(ws_pd.data_type); context_.ws_mem.reset(new memory( {{{context_.ws_dims}, context_.ws_dt, context_.ws_fmt}, cpu_engine}, @@ -211,13 +214,22 @@ void MklPoolParameters::Init(OpKernelContext* context, const std::vector& stride, Padding padding, TensorFormat data_format, const TensorShape& tensor_in_shape) { - // For maxpooling, tensor_in should have 4 dimensions. - OP_REQUIRES(context, tensor_in_shape.dims() == 4, - errors::InvalidArgument("tensor_in must be 4-dimensional")); + // For maxpooling, tensor_in should have 4 or 5 dimensions. + OP_REQUIRES(context, + tensor_in_shape.dims() == 4 || tensor_in_shape.dims() == 5, + errors::InvalidArgument("tensor_in must be 4 or 5-dimensional")); depth = GetTensorDim(tensor_in_shape, data_format, 'C'); - tensor_in_cols = GetTensorDim(tensor_in_shape, data_format, 'W'); - tensor_in_rows = GetTensorDim(tensor_in_shape, data_format, 'H'); + if (tensor_in_shape.dims() == 4) { + // Pool2D + tensor_in_cols = GetTensorDim(tensor_in_shape, data_format, 'W'); + tensor_in_rows = GetTensorDim(tensor_in_shape, data_format, 'H'); + } else { + // Pool3D + tensor_in_planes = GetTensorDim(tensor_in_shape, data_format, '0'); + tensor_in_rows = GetTensorDim(tensor_in_shape, data_format, '1'); + tensor_in_cols = GetTensorDim(tensor_in_shape, data_format, '2'); + } tensor_in_batch = GetTensorDim(tensor_in_shape, data_format, 'N'); Init(context, ksize, stride, padding, data_format); @@ -246,10 +258,20 @@ void MklPoolParameters::Init(OpKernelContext* context, TensorFormat data_format, const MklDnnShape* mklInputShape) { // Get the input sizes - depth = mklInputShape->GetDimension('C'); - tensor_in_cols = mklInputShape->GetDimension('W'); - tensor_in_rows = mklInputShape->GetDimension('H'); - tensor_in_batch = mklInputShape->GetDimension('N'); + if (ksize.size() == 4) { + // Pool2D + depth = mklInputShape->GetDimension('C'); + tensor_in_cols = mklInputShape->GetDimension('W'); + tensor_in_rows = mklInputShape->GetDimension('H'); + tensor_in_batch = mklInputShape->GetDimension('N'); + } else { + // Pool3D + depth = mklInputShape->GetDimension3D('C'); + tensor_in_cols = mklInputShape->GetDimension3D('W'); + tensor_in_rows = mklInputShape->GetDimension3D('H'); + tensor_in_planes = mklInputShape->GetDimension3D('D'); + tensor_in_batch = mklInputShape->GetDimension3D('N'); + } Init(context, ksize, stride, padding, data_format); } @@ -262,25 +284,58 @@ void MklPoolParameters::Init(OpKernelContext* context, // Get the data format this->data_format = data_format; - // Get the output sizes - window_rows = GetTensorDim(ksize, data_format, 'H'); - window_cols = GetTensorDim(ksize, data_format, 'W'); - depth_window = GetTensorDim(ksize, data_format, 'C'); - - // Get the strides - row_stride = GetTensorDim(stride, data_format, 'H'); - col_stride = GetTensorDim(stride, data_format, 'W'); - depth_stride = GetTensorDim(stride, data_format, 'C'); + bool isPool2D = (ksize.size() == 4); + if (isPool2D) { + // Pool2D + // Get the output sizes + window_rows = GetTensorDim(ksize, data_format, 'H'); + window_cols = GetTensorDim(ksize, data_format, 'W'); + depth_window = GetTensorDim(ksize, data_format, 'C'); + + // Get the strides + row_stride = GetTensorDim(stride, data_format, 'H'); + col_stride = GetTensorDim(stride, data_format, 'W'); + depth_stride = GetTensorDim(stride, data_format, 'C'); + + // We only support 2D pooling across width/height and depthwise + // pooling, not a combination. + OP_REQUIRES(context, + (depth_window == 1 || (window_rows == 1 && window_cols == 1)), + errors::Unimplemented( + "MaxPooling supports exactly one of pooling across depth " + "or pooling across width/height.")); + } else { + // Pool3D + // Get the output sizes + window_planes = GetTensorDim(ksize, data_format, '0'); + window_rows = GetTensorDim(ksize, data_format, '1'); + window_cols = GetTensorDim(ksize, data_format, '2'); + depth_window = GetTensorDim(ksize, data_format, 'C'); + + // Get the strides + planes_stride = GetTensorDim(stride, data_format, '0'); + row_stride = GetTensorDim(stride, data_format, '1'); + col_stride = GetTensorDim(stride, data_format, '2'); + depth_stride = GetTensorDim(stride, data_format, 'C'); + + // We only support 3D pooling across depth/width/height and depthwise + // pooling, not a combination. + OP_REQUIRES(context, + (depth_window == 1 || + (window_rows == 1 && window_cols == 1 && window_planes == 1)), + errors::Unimplemented( + "AvgPooling3D supports exactly one of pooling across depth " + "or pooling across depth/width/height.")); + } - // We only support 2D pooling across width/height and depthwise - // pooling, not a combination. - OP_REQUIRES(context, - (depth_window == 1 || (window_rows == 1 && window_cols == 1)), - errors::Unimplemented( - "MaxPooling supports exactly one of pooling across depth " - "or pooling across width/height.")); + if (depth_window == 1) { // we are pooling in the D (Pool3D only), H and W + if (!isPool2D) { + OP_REQUIRES_OK( + context, GetWindowedOutputSizeVerbose(tensor_in_planes, window_planes, + planes_stride, padding, + &out_planes, &pad_P1, &pad_P2)); + } - if (depth_window == 1) { // we are pooling in the H and W OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose( tensor_in_rows, window_rows, row_stride, padding, &out_height, &pad_top, &pad_bottom)); @@ -290,7 +345,14 @@ void MklPoolParameters::Init(OpKernelContext* context, padding, &out_width, &pad_left, &pad_right)); #ifndef INTEL_MKL_ML_ONLY // TF can work with int64, but mkldnn only supports int32 - // Fail if the height or width are greater than MAX_INT + // Fail if the depth, height or width are greater than MAX_INT + // We check depth only for 3D pooling case + + if (!isPool2D) { + OP_REQUIRES(context, + FastBoundsCheck(out_planes, std::numeric_limits::max()), + errors::InvalidArgument("output depth/planes is too large")); + } OP_REQUIRES(context, FastBoundsCheck(out_height, std::numeric_limits::max()), @@ -299,7 +361,6 @@ void MklPoolParameters::Init(OpKernelContext* context, OP_REQUIRES(context, FastBoundsCheck(out_width, std::numeric_limits::max()), errors::InvalidArgument("output width is too large")); - #endif out_depth = depth; // output will have the same depth as the input } else { // we are pooling in the depth dimension diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index ec7af5092d..ea7458062c 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -19,6 +19,7 @@ limitations under the License. #ifdef INTEL_MKL #include #include +#include #include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/padding.h" @@ -32,7 +33,7 @@ using mkldnn::stream; namespace tensorflow { -#ifndef INTEL_MKL_ML +#ifndef INTEL_MKL_ML_ONLY using mkldnn::memory; using mkldnn::pooling_avg; @@ -357,22 +358,28 @@ typedef Eigen::ThreadPoolDevice CPUDevice; struct MklPoolParameters { int depth; + int tensor_in_planes; // Pool3D int tensor_in_cols; int tensor_in_rows; int tensor_in_batch; + int window_planes; // Pool3D int window_rows; int window_cols; int depth_window; + int planes_stride; // Pool3D int row_stride; int col_stride; int depth_stride; + int64 out_planes; // Pool3D int64 out_height; int64 out_width; int out_depth; + int64 pad_P1; // Pool3D + int64 pad_P2; // Pool3D int64 pad_left; int64 pad_right; int64 pad_top; @@ -382,18 +389,24 @@ struct MklPoolParameters { TensorFormat data_format; MklPoolParameters() : depth(0), + tensor_in_planes(0), tensor_in_cols(0), tensor_in_rows(0), tensor_in_batch(0), + window_planes(0), window_rows(0), window_cols(0), depth_window(0), + planes_stride(0), row_stride(0), col_stride(0), depth_stride(0), + out_planes(0), out_height(0), out_width(0), out_depth(0), + pad_P1(0), + pad_P2(0), pad_left(0), pad_right(0), pad_top(0), @@ -433,20 +446,22 @@ class MklPoolingOpBase : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &this->data_format_tf_), errors::InvalidArgument("Invalid data format")); - this->data_format_mkldnn_ = - TFDataFormatToMklDnnDataFormat(this->data_format_tf_); OP_REQUIRES_OK(context, context->GetAttr("ksize", &this->ksize_)); - OP_REQUIRES(context, this->ksize_.size() == 4, + OP_REQUIRES(context, this->ksize_.size() == 4 || this->ksize_.size() == 5, errors::InvalidArgument("Sliding window ksize field must " - "specify 4 dimensions")); + "specify 4 or 5 dimensions")); OP_REQUIRES_OK(context, context->GetAttr("strides", &this->stride_)); - OP_REQUIRES(context, this->stride_.size() == 4, + OP_REQUIRES(context, this->stride_.size() == 4 || this->stride_.size() == 5, errors::InvalidArgument("Sliding window strides field must " - "specify 4 dimensions")); + "specify 4 or 5 dimensions")); OP_REQUIRES_OK(context, context->GetAttr("padding", &this->padding_)); OP_REQUIRES(context, this->ksize_[0] == 1 && this->stride_[0] == 1, errors::Unimplemented("Pooling is not yet supported on the " "batch dimension.")); + bool isPool2D = (this->ksize_.size() == 4); + this->data_format_mkldnn_ = + isPool2D ? TFDataFormatToMklDnnDataFormat(this->data_format_tf_) + : TFDataFormatToMklDnn3DDataFormat(this->data_format_tf_); // We may not get this attribute for this node if it does not go through // graph rewrite pass. So we do not check for error while retrieving this @@ -457,17 +472,26 @@ class MklPoolingOpBase : public OpKernel { protected: // Calculate output shape of pooling op in MKL-DNN and TensorFlow order. - // MKL-DNN uses NCHW for output order. But TensorFlow output will be in - // NHWC or NCHW format depending on data format. Function expects - // output height and output width to have already been int32 - // bounds-checked + // MKL-DNN uses NCHW(Pool2D) or NCDHW(Pool3D) for output order. + // But TensorFlow output will be in NHWC/NCHW(Pool2D) or + // NDHWC/NCDHW(Pool3D) format depending on data format. Function expects + // output height and width to have already been int32 bounds-checked. void GetOutputDims(const MklPoolParameters& mkl_pool_params, memory::dims* output_dims_mkl_order) { - // MKL-DNN always needs output in NCHW format. - *output_dims_mkl_order = {mkl_pool_params.tensor_in_batch, - mkl_pool_params.out_depth, - static_cast(mkl_pool_params.out_height), - static_cast(mkl_pool_params.out_width)}; + if (this->ksize_.size() == 4) { + // Pooling2D: MKL-DNN always needs output in NCHW format. + *output_dims_mkl_order = {mkl_pool_params.tensor_in_batch, + mkl_pool_params.out_depth, + static_cast(mkl_pool_params.out_height), + static_cast(mkl_pool_params.out_width)}; + } else { + // Pooling3D: MKL-DNN always needs output in NCDHW format. + *output_dims_mkl_order = {mkl_pool_params.tensor_in_batch, + mkl_pool_params.out_depth, + static_cast(mkl_pool_params.out_planes), + static_cast(mkl_pool_params.out_height), + static_cast(mkl_pool_params.out_width)}; + } } void InitMklPoolParameters(OpKernelContext* context, @@ -485,14 +509,34 @@ class MklPoolingOpBase : public OpKernel { void PoolParamsToDims(const MklPoolParameters* pool_params, memory::dims* filter_dims, memory::dims* strides, - memory::dims* padding_left, - memory::dims* padding_right) { - *filter_dims = {pool_params->window_rows, pool_params->window_cols}; - *strides = {pool_params->row_stride, pool_params->col_stride}; - *padding_left = {static_cast(pool_params->pad_top), - static_cast(pool_params->pad_left)}; - *padding_right = {static_cast(pool_params->pad_bottom), - static_cast(pool_params->pad_right)}; + memory::dims* padding_left, memory::dims* padding_right, + bool isPool2D) { + if (isPool2D) { + // Pool2D + *filter_dims = + memory::dims({pool_params->window_rows, pool_params->window_cols}); + *strides = + memory::dims({pool_params->row_stride, pool_params->col_stride}); + *padding_left = memory::dims({static_cast(pool_params->pad_top), + static_cast(pool_params->pad_left)}); + *padding_right = memory::dims({static_cast(pool_params->pad_bottom), + static_cast(pool_params->pad_right)}); + } else { + // Pool3D + *filter_dims = + memory::dims({pool_params->window_planes, pool_params->window_rows, + pool_params->window_cols}); + *strides = + memory::dims({pool_params->planes_stride, pool_params->row_stride, + pool_params->col_stride}); + + *padding_left = memory::dims({static_cast(pool_params->pad_P1), + static_cast(pool_params->pad_top), + static_cast(pool_params->pad_left)}); + *padding_right = memory::dims({static_cast(pool_params->pad_P2), + static_cast(pool_params->pad_bottom), + static_cast(pool_params->pad_right)}); + } } void AllocateEmptyOutputTensor(OpKernelContext* context, @@ -556,12 +600,27 @@ class MklPoolingForwardOpBase : public MklPoolingOpBase { TensorShape input_tensor_shape = input_tensor.shape(); if (input_tensor.NumElements() != 0) { memory::desc input_md = - input_mkl_shape.IsMklTensor() - ? input_mkl_shape.GetMklLayout() - : memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape, + input_mkl_shape.IsMklTensor() + ? input_mkl_shape.GetMklLayout() + : memory::desc( + (this->ksize_.size() == 4) + ? TFShapeToMklDnnDimsInNCHW(input_tensor_shape, + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(input_tensor_shape, this->data_format_tf_), - MklDnnType(), this->data_format_mkldnn_); + MklDnnType(), this->data_format_mkldnn_); dnn_data_input->SetUsrMem(input_md, &input_tensor); + + if (this->ksize_.size() == 5) { + // Pool3D + std::vector mkldnn_sizes(5, -1); + mkldnn_sizes[MklDnnDims3D::Dim3d_N] = input_md.data.dims[0]; + mkldnn_sizes[MklDnnDims3D::Dim3d_C] = input_md.data.dims[1]; + mkldnn_sizes[MklDnnDims3D::Dim3d_D] = input_md.data.dims[2]; + mkldnn_sizes[MklDnnDims3D::Dim3d_H] = input_md.data.dims[3]; + mkldnn_sizes[MklDnnDims3D::Dim3d_W] = input_md.data.dims[4]; + dnn_data_input->SetOpMemDesc(mkldnn_sizes, this->data_format_mkldnn_); + } } this->InitMklPoolParameters(context, pool_params, input_mkl_shape, input_tensor_shape); @@ -593,12 +652,13 @@ class MklPoolingForwardOpBase : public MklPoolingOpBase { void SanityCheckInput(OpKernelContext* context, const Tensor& input_tensor, const MklDnnShape& input_mkl_shape) { if (!input_mkl_shape.IsMklTensor()) { - OP_REQUIRES(context, input_tensor.dims() == 4, - errors::InvalidArgument("Input must be 4-dimensional")); + OP_REQUIRES(context, input_tensor.dims() == 4 || input_tensor.dims() == 5, + errors::InvalidArgument("Input must be 4 or 5-dimensional")); } else { - OP_REQUIRES(context, input_mkl_shape.GetDimension() == 4, + OP_REQUIRES(context, input_mkl_shape.GetDimension() == 4 || + input_mkl_shape.GetDimension() == 5, errors::InvalidArgument("Input shape must be " - "4-dimensional")); + "4 or 5-dimensional")); } } // .Input("value: T") @@ -649,8 +709,12 @@ class MklPoolingBackwardOpBase : public MklPoolingOpBase { input_gradient_mkl_shape.IsMklTensor() ? input_gradient_mkl_shape.GetMklLayout() : memory::desc( - TFShapeToMklDnnDimsInNCHW(input_gradient_tensor.shape(), - this->data_format_tf_), + (this->ksize_.size() == 4) + ? TFShapeToMklDnnDimsInNCHW(input_gradient_tensor.shape(), + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW( + input_gradient_tensor.shape(), + this->data_format_tf_), MklDnnType(), this->data_format_mkldnn_); input_gradient_dnn_data->SetUsrMem(original_input_grad_md, diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index e0f25fb4ef..a01413f2a7 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1943,6 +1943,104 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is expected to invoke these operators. )doc"); +REGISTER_OP("_MklAvgPool3D") + .Input("value: T") + .Input("mkl_input: uint8") + .Output("output: T") + .Output("mkl_output: uint8") + .Attr("ksize: list(int) >= 5") + .Attr("strides: list(int) >= 5") + .Attr(GetPaddingAttrString()) + .Attr(GetConvnet3dDataFormatAttrString()) + .Attr("T: {float, half, double}") + .SetShapeFn(shape_inference::Pool3DShape) + .Doc(R"doc( +MKL version of AvgPool3D operator. Uses MKL DNN APIs to perform average pooling +on the input. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + + +REGISTER_OP("_MklAvgPool3DGrad") + .Input("orig_input_shape: int32") + .Input("grad: T") + .Input("mkl_orig_input: uint8") + .Input("mkl_grad: uint8") + .Output("output: T") + .Output("mkl_output: uint8") + .Attr("ksize: list(int) >= 5") + .Attr("strides: list(int) >= 5") + .Attr(GetPaddingAttrString()) + .Attr(GetConvnet3dDataFormatAttrString()) + .Attr("T: {float, half, double}") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); + TF_RETURN_IF_ERROR(c->WithRank(s, 5, &s)); + c->set_output(0, s); + return Status::OK(); + }) + .Doc(R"doc( +MKL version of AvgPool3DGrad operator. Uses MKL DNN APIs to compute gradients +of AvgPool function. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + +REGISTER_OP("_MklMaxPool3D") + .Input("input: T") + .Input("mkl_input: uint8") + .Output("output: T") + .Output("workspace: uint8") + .Output("mkl_output: uint8") + .Output("mkl_workspace: uint8") + .Attr("ksize: list(int) >= 5") + .Attr("strides: list(int) >= 5") + .Attr(GetPaddingAttrString()) + .Attr(GetConvnet3dDataFormatAttrString()) + .Attr("T: {half, bfloat16, float}") + .Attr("workspace_enabled: bool = false") + .SetShapeFn(shape_inference::Pool3DShape) + .Doc(R"doc( +MKL version of MaxPool3D operator. Uses MKL DNN APIs to perform average pooling +on the input. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + +REGISTER_OP("_MklMaxPool3DGrad") + .Input("orig_input: TInput") + .Input("orig_output: TInput") + .Input("grad: T") + .Input("workspace: uint8") + .Input("mkl_orig_input: uint8") + .Input("mkl_orig_output: uint8") + .Input("mkl_grad: uint8") + .Input("mkl_workspace: uint8") + .Output("output: T") + .Output("mkl_output: uint8") + .Attr("ksize: list(int) >= 5") + .Attr("strides: list(int) >= 5") + .Attr(GetPaddingAttrString()) + .Attr(GetConvnet3dDataFormatAttrString()) + .Attr("T: {half, bfloat16, float} = DT_FLOAT") + .Attr("TInput: {half, bfloat16, float} = DT_FLOAT") + .Attr("workspace_enabled: bool = false") + .SetShapeFn([](InferenceContext* c) { + return UnchangedShapeWithRank(c, 5); + }) + .Doc(R"doc( +MKL version of MklPool3DGrad operator. Uses MKL DNN APIs to compute gradients +of MklPool function. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + REGISTER_OP("_MklLRN") .Input("input: T") .Input("mkl_input: uint8") diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 159a787d05..79fc7500fc 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -66,7 +66,6 @@ using mkldnn::reorder; typedef unsigned int uint; #endif - namespace tensorflow { // The file contains a number of utility classes and functions used by MKL @@ -87,6 +86,17 @@ typedef enum { Dim_I = 1 } MklDnnDims; +typedef enum { + Dim3d_N = 0, + Dim3d_C = 1, + Dim3d_D = 2, + Dim3d_H = 3, + Dim3d_W = 4, + Dim3d_O = 0, + Dim3d_I = 1 +} MklDnnDims3D; + + #ifdef INTEL_MKL_ML_ONLY class MklShape { public: @@ -453,6 +463,14 @@ class MklDnnShape { return this->DimSize(index); } + inline size_t GetDimension3D(char dimension) const { + int index = GetMklDnnTensor3DDimIndex(dimension); + CHECK(index >= 0 && index < this->GetDimension()) + << "Invalid index from the dimension: " << index << ", " << dimension; + return this->DimSize(index); + } + + inline int32 GetMklDnnTensorDimIndex(char dimension) const { switch (dimension) { case 'N': @@ -469,6 +487,24 @@ class MklDnnShape { } } + inline int32 GetMklDnnTensor3DDimIndex(char dimension) const { + switch (dimension) { + case 'N': + return MklDnnDims3D::Dim3d_N; + case 'C': + return MklDnnDims3D::Dim3d_C; + case 'D': + return MklDnnDims3D::Dim3d_D; + case 'H': + return MklDnnDims3D::Dim3d_H; + case 'W': + return MklDnnDims3D::Dim3d_W; + default: + LOG(FATAL) << "Invalid dimension: " << dimension; + return -1; // Avoid compiler warning about missing return value + } + } + inline size_t GetDimension() const { return data_.dimension_; } inline const int* GetSizes() const { return reinterpret_cast(&data_.sizes_[0]); @@ -587,15 +623,29 @@ class MklDnnShape { } inline void SetTfDimOrder(const size_t dimension, TensorFormat data_format) { - // TODO(nhasabni): Why do we restrict this to 4D? - CHECK_EQ(dimension, 4); - CHECK(dimension == data_.dimension_); - data_.map_[GetTensorDimIndex<2>(data_format, 'W')] = MklDnnDims::Dim_W; - data_.map_[GetTensorDimIndex<2>(data_format, 'H')] = MklDnnDims::Dim_H; - data_.map_[GetTensorDimIndex<2>(data_format, 'C')] = MklDnnDims::Dim_C; - data_.map_[GetTensorDimIndex<2>(data_format, 'N')] = MklDnnDims::Dim_N; + if (dimension == 5) { + CHECK(dimension == data_.dimension_); + data_.map_[GetTensorDimIndex<3>(data_format, '0')] = + MklDnnDims3D::Dim3d_D; + data_.map_[GetTensorDimIndex<3>(data_format, '1')] = + MklDnnDims3D::Dim3d_H; + data_.map_[GetTensorDimIndex<3>(data_format, '2')] = + MklDnnDims3D::Dim3d_W; + data_.map_[GetTensorDimIndex<3>(data_format, 'C')] = + MklDnnDims3D::Dim3d_C; + data_.map_[GetTensorDimIndex<3>(data_format, 'N')] = + MklDnnDims3D::Dim3d_N; + } else { + CHECK_EQ(dimension, 4); + CHECK(dimension == data_.dimension_); + data_.map_[GetTensorDimIndex<2>(data_format, 'W')] = MklDnnDims::Dim_W; + data_.map_[GetTensorDimIndex<2>(data_format, 'H')] = MklDnnDims::Dim_H; + data_.map_[GetTensorDimIndex<2>(data_format, 'C')] = MklDnnDims::Dim_C; + data_.map_[GetTensorDimIndex<2>(data_format, 'N')] = MklDnnDims::Dim_N; + } } + inline void SetTfDimOrder(const size_t dimension, memory::format format) { TensorFormat data_format = MklDnnDataFormatToTFDataFormat(format); SetTfDimOrder(dimension, data_format); @@ -1329,6 +1379,19 @@ memory::data_type MklDnnType() { return memory::data_type::f32; } +/// Map TensorFlow's data format into MKL-DNN 3D data format +/// @input: TensorFlow data format +/// @return: memory::format corresponding to TensorFlow data format; +/// Fails with an error if invalid data format. +inline memory::format TFDataFormatToMklDnn3DDataFormat(TensorFormat format) { + if (format == FORMAT_NHWC) + return memory::format::ndhwc; + else if (format == FORMAT_NCHW) + return memory::format::ncdhw; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); + return memory::format::format_undef; +} + /// Map TensorFlow's data format into MKL-DNN data format /// /// @input: TensorFlow data format @@ -1350,9 +1413,9 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { /// @return: Tensorflow data format corresponding to memory::format /// Fails with an error if invalid data format. inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) { - if (format == memory::format::nhwc) + if (format == memory::format::nhwc || format == memory::format::ndhwc) return FORMAT_NHWC; - else if (format == memory::format::nchw) + else if (format == memory::format::nchw || format == memory::format::ncdhw) return FORMAT_NCHW; TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); @@ -1402,6 +1465,23 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, return memory::dims({n, c, h, w}); } +inline memory::dims TFShapeToMklDnnDimsInNCDHW(const TensorShape& shape, + TensorFormat format) { + // Check validity of format. + CHECK_NE(TFDataFormatToMklDnn3DDataFormat(format), + memory::format::format_undef); + + int n = shape.dim_size(GetTensorDimIndex<3>(format, 'N')); + int c = shape.dim_size(GetTensorDimIndex<3>(format, 'C')); + int d = shape.dim_size(GetTensorDimIndex<3>(format, '0')); + int h = shape.dim_size(GetTensorDimIndex<3>(format, '1')); + int w = shape.dim_size(GetTensorDimIndex<3>(format, '2')); + + // MKL-DNN requires dimensions in NCDHW format. + return memory::dims({n, c, d, h, w}); +} + + /// Overloaded version of function above. Input parameters are /// self-explanatory. inline memory::dims MklDnnDimsInNCHW(const memory::dims& in_dims, @@ -1976,16 +2056,20 @@ class FactoryKeyCreator { } }; -static inline memory::format get_desired_format(int channel) { + +static inline memory::format get_desired_format(int channel, + bool is_2d = true) { memory::format fmt_desired = memory::format::any; - if (port::TestCPUFeature(port::CPUFeature::AVX512F) && (channel % 16) == 0) { - fmt_desired = memory::format::nChw16c; + if (port::TestCPUFeature(port::CPUFeature::AVX512F)) { + fmt_desired = is_2d ? memory::format::nChw16c : memory::format::nCdhw16c; } else if (port::TestCPUFeature(port::CPUFeature::AVX2) && (channel % 8) == 0) { - fmt_desired = memory::format::nChw8c; + fmt_desired = is_2d + ? memory::format::nChw8c + : memory::format::ncdhw; //not support avx2 for 3d yet. } else { - fmt_desired = memory::format::nchw; + fmt_desired = is_2d ? memory::format::nchw : memory::format::ncdhw; } return fmt_desired; } -- GitLab From 7b35aac2924b2dbd744ff5db9a24d8b05eb90f58 Mon Sep 17 00:00:00 2001 From: AG Ramesh Date: Mon, 13 Aug 2018 18:00:58 -0700 Subject: [PATCH 033/380] Replaced INTEL_MKL_ML with new macro INTEL_MKL_ML_ONLY --- tensorflow/core/kernels/mkl_relu_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 8db43b2a8d..99f8136f41 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -44,7 +44,7 @@ using mkldnn::memory; namespace tensorflow { -#ifndef INTEL_MKL_ML +#ifndef INTEL_MKL_ML_ONLY template class MklEltwiseFwdParams { -- GitLab From 285273717d17c0609c49e020b4cc9220913d3558 Mon Sep 17 00:00:00 2001 From: Ben Date: Mon, 13 Aug 2018 21:50:54 -0400 Subject: [PATCH 034/380] py37 --- tensorflow/c/eager/c_api.cc | 8 +++--- tensorflow/c/eager/c_api.h | 4 +-- tensorflow/python/eager/pywrap_tfe_src.cc | 4 +-- tensorflow/workspace.bzl | 34 +++++++++++++++-------- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index dfb1c9a376..ce5a3f29a4 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -244,8 +244,8 @@ void TFE_ContextOptionsSetConfig(TFE_ContextOptions* options, const void* proto, } void TFE_ContextOptionsSetAsync(TFE_ContextOptions* options, - unsigned char async) { - options->async = async; + unsigned char async_) { + options->async = async_; } void TFE_ContextOptionsSetDevicePlacementPolicy( TFE_ContextOptions* options, TFE_ContextDevicePlacementPolicy policy) { @@ -253,9 +253,9 @@ void TFE_ContextOptionsSetDevicePlacementPolicy( } TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context* ctx, - unsigned char async, + unsigned char async_, TF_Status* status) { - status->status = ctx->context.SetAsyncForThread(async); + status->status = ctx->context.SetAsyncForThread(async_); } void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; } diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index a0ebc6fa0a..db0079b0de 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -76,7 +76,7 @@ typedef enum TFE_ContextDevicePlacementPolicy { // Sets the default execution mode (sync/async). Note that this can be // overridden per thread using TFE_ContextSetAsyncForThread. TF_CAPI_EXPORT extern void TFE_ContextOptionsSetAsync(TFE_ContextOptions*, - unsigned char async); + unsigned char async_); TF_CAPI_EXPORT extern void TFE_ContextOptionsSetDevicePlacementPolicy( TFE_ContextOptions*, TFE_ContextDevicePlacementPolicy); @@ -114,7 +114,7 @@ TFE_ContextGetDevicePlacementPolicy(TFE_Context*); // Overrides the execution mode (sync/async) for the current thread. TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context*, - unsigned char async, + unsigned char async_, TF_Status* status); // A tensorflow.ServerDef specifies remote workers (in addition to the current diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 2d54555cd3..64cf36d079 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -216,7 +216,7 @@ bool ParseStringValue(const string& key, PyObject* py_value, TF_Status* status, #if PY_MAJOR_VERSION >= 3 if (PyUnicode_Check(py_value)) { Py_ssize_t size = 0; - char* buf = PyUnicode_AsUTF8AndSize(py_value, &size); + const char* buf = PyUnicode_AsUTF8AndSize(py_value, &size); if (buf == nullptr) return false; *value = tensorflow::StringPiece(buf, size); return true; @@ -825,7 +825,7 @@ int MaybeRaiseExceptionFromStatus(const tensorflow::Status& status, return -1; } -char* TFE_GetPythonString(PyObject* o) { +const char* TFE_GetPythonString(PyObject* o) { if (PyBytes_Check(o)) { return PyBytes_AsString(o); } diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index c21e5ebc9e..7138c0a452 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -362,11 +362,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "protobuf_archive", urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.0.tar.gz", - "https://github.com/google/protobuf/archive/v3.6.0.tar.gz", + "https://mirror.bazel.build/github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", + "https://github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", ], - sha256 = "50a5753995b3142627ac55cfd496cebc418a2e575ca0236e29033c67bd5665f4", - strip_prefix = "protobuf-3.6.0", + sha256 = "4bb48bcc972ee9d40a8bb7e481522030b6e6771a7283ae83c896872115180d25", + strip_prefix = "protobuf-0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7", ) # We need to import the protobuf library under the names com_google_protobuf @@ -375,21 +375,31 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "com_google_protobuf", urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.0.tar.gz", - "https://github.com/google/protobuf/archive/v3.6.0.tar.gz", + "https://mirror.bazel.build/github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", + "https://github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", ], - sha256 = "50a5753995b3142627ac55cfd496cebc418a2e575ca0236e29033c67bd5665f4", - strip_prefix = "protobuf-3.6.0", + sha256 = "4bb48bcc972ee9d40a8bb7e481522030b6e6771a7283ae83c896872115180d25", + strip_prefix = "protobuf-0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7", ) tf_http_archive( name = "com_google_protobuf_cc", urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.0.tar.gz", - "https://github.com/google/protobuf/archive/v3.6.0.tar.gz", + "https://mirror.bazel.build/github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", + "https://github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", ], - sha256 = "50a5753995b3142627ac55cfd496cebc418a2e575ca0236e29033c67bd5665f4", - strip_prefix = "protobuf-3.6.0", + sha256 = "4bb48bcc972ee9d40a8bb7e481522030b6e6771a7283ae83c896872115180d25", + strip_prefix = "protobuf-0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7", + ) + + tf_http_archive( + name = "bazel_skylib", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/archive/2169ae1c374aab4a09aa90e65efe1a3aad4e279b.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/archive/2169ae1c374aab4a09aa90e65efe1a3aad4e279b.tar.gz" + ], + sha256 = "bbccf674aa441c266df9894182d80de104cabd19be98be002f6d478aaa31574d", + strip_prefix = "bazel-skylib-2169ae1c374aab4a09aa90e65efe1a3aad4e279b", ) tf_http_archive( -- GitLab From 4aaab50552a3cdb4b785653f071ae6c7193992ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 14 Aug 2018 12:25:18 +0800 Subject: [PATCH 035/380] CLN: fix coding style --- tensorflow/python/ops/array_grad.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 328b4f7d53..2beb58d534 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -763,9 +763,10 @@ def _ExtractImagePatchesGrad(op, grad): (1, rows_out, cols_out, ksize_r * ksize_c)) # Construct mapping table for indices: (input -> output). - idx_matrix = array_ops.concat([array_ops.expand_dims(input_idx_patched, axis=-1), - array_ops.expand_dims(output_idx, axis=-1)], - axis=-1) + idx_matrix = array_ops.concat( + [array_ops.expand_dims(input_idx_patched, axis=-1), + array_ops.expand_dims(output_idx, axis=-1)], + axis=-1) idx_map = array_ops.reshape(idx_matrix, (-1, 2)) sp_shape = (input_indices_num, output_indices_num) -- GitLab From f982cfe9f943c9920cafeefff7818ea298d5b509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 14 Aug 2018 12:41:36 +0800 Subject: [PATCH 036/380] TST: add benchmark --- .../extract_image_patches_grad_test.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py b/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py index 60090a1510..e1f5a6b620 100644 --- a/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py +++ b/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py @@ -25,6 +25,8 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import random_seed as random_seed_lib from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test @@ -100,6 +102,24 @@ class ExtractImagePatchesGradTest(test.TestCase): print('extract_image_patches gradient err: %.4e' % err) self.assertLess(err, 1e-4) + def testConstructGradientWithLargeImages(self): + batch_size = 4 + height = 1024 + width = 1024 + ksize = 5 + images = variable_scope.get_variable('inputs', + (batch_size, height, width, 1)) + patches = array_ops.extract_image_patches(images, + ksizes=[1, ksize, ksize, 1], + strides=[1, 1, 1, 1], + rates=[1, 1, 1, 1], + padding='SAME') + # Github issue: #20146 + # tf.extract_image_patches() gradient very slow at graph construction time + gradients = gradients_impl.gradients(patches, images) + # Won't time out. + self.assertIsNotNone(gradients) + if __name__ == '__main__': test.main() -- GitLab From 26e7d51fee4ecfaeffbfad7beaf6952b3132b444 Mon Sep 17 00:00:00 2001 From: bstriner Date: Tue, 14 Aug 2018 04:48:11 -0400 Subject: [PATCH 037/380] py37 --- tensorflow/c/eager/c_api.cc | 8 ++++---- tensorflow/python/eager/pywrap_tfe.h | 2 +- tensorflow/python/pywrap_tfe.i | 6 +++--- tensorflow/workspace.bzl | 24 ++++++++++++------------ 4 files changed, 20 insertions(+), 20 deletions(-) mode change 100644 => 100755 tensorflow/c/eager/c_api.cc mode change 100644 => 100755 tensorflow/python/eager/pywrap_tfe.h mode change 100644 => 100755 tensorflow/python/pywrap_tfe.i mode change 100644 => 100755 tensorflow/workspace.bzl diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc old mode 100644 new mode 100755 index ce5a3f29a4..1ccae3f138 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -244,8 +244,8 @@ void TFE_ContextOptionsSetConfig(TFE_ContextOptions* options, const void* proto, } void TFE_ContextOptionsSetAsync(TFE_ContextOptions* options, - unsigned char async_) { - options->async = async_; + unsigned char enable) { + options->async = enable; } void TFE_ContextOptionsSetDevicePlacementPolicy( TFE_ContextOptions* options, TFE_ContextDevicePlacementPolicy policy) { @@ -253,9 +253,9 @@ void TFE_ContextOptionsSetDevicePlacementPolicy( } TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context* ctx, - unsigned char async_, + unsigned char enable, TF_Status* status) { - status->status = ctx->context.SetAsyncForThread(async_); + status->status = ctx->context.SetAsyncForThread(enable); } void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; } diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h old mode 100644 new mode 100755 index a916a75f00..823c4078b8 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -89,7 +89,7 @@ int MaybeRaiseExceptionFromStatus(const tensorflow::Status& status, PyObject* exception); // Returns the string associated with the passed-in python object. -char* TFE_GetPythonString(PyObject* o); +const char* TFE_GetPythonString(PyObject* o); // Returns a unique id on each call. int64_t get_uid(); diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i old mode 100644 new mode 100755 index 157f2341e0..bc02e9a35c --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -106,19 +106,19 @@ limitations under the License. } %typemap(in) const char* serialized_function_def { - $1 = TFE_GetPythonString($input); + $1 = const_cast(TFE_GetPythonString($input)); } %typemap(in) const char* device_name { if ($input == Py_None) { $1 = nullptr; } else { - $1 = TFE_GetPythonString($input); + $1 = const_cast(TFE_GetPythonString($input)); } } %typemap(in) const char* op_name { - $1 = TFE_GetPythonString($input); + $1 = const_cast(TFE_GetPythonString($input)); } %typemap(in) (TFE_Context*) { diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl old mode 100644 new mode 100755 index 7138c0a452..769e74d5a5 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -362,11 +362,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "protobuf_archive", urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", - "https://github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", + "https://mirror.bazel.build/github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", + "https://github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", ], - sha256 = "4bb48bcc972ee9d40a8bb7e481522030b6e6771a7283ae83c896872115180d25", - strip_prefix = "protobuf-0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7", + sha256 = "b91b0ac9907af983877c960809dcad7a6dc8e4b06e34d32b7d66a12b9ea1fa17", + strip_prefix = "protobuf-fe2eef4bf414ebb352cf11bcec633f1fd46ec876", ) # We need to import the protobuf library under the names com_google_protobuf @@ -375,21 +375,21 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "com_google_protobuf", urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", - "https://github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", + "https://mirror.bazel.build/github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", + "https://github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", ], - sha256 = "4bb48bcc972ee9d40a8bb7e481522030b6e6771a7283ae83c896872115180d25", - strip_prefix = "protobuf-0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7", + sha256 = "b91b0ac9907af983877c960809dcad7a6dc8e4b06e34d32b7d66a12b9ea1fa17", + strip_prefix = "protobuf-fe2eef4bf414ebb352cf11bcec633f1fd46ec876", ) tf_http_archive( name = "com_google_protobuf_cc", urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", - "https://github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", + "https://mirror.bazel.build/github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", + "https://github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", ], - sha256 = "4bb48bcc972ee9d40a8bb7e481522030b6e6771a7283ae83c896872115180d25", - strip_prefix = "protobuf-0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7", + sha256 = "b91b0ac9907af983877c960809dcad7a6dc8e4b06e34d32b7d66a12b9ea1fa17", + strip_prefix = "protobuf-fe2eef4bf414ebb352cf11bcec633f1fd46ec876", ) tf_http_archive( -- GitLab From 60ea4be9ac3bdbee55dee9b011b151971dfae5ad Mon Sep 17 00:00:00 2001 From: bstriner Date: Tue, 14 Aug 2018 05:18:43 -0400 Subject: [PATCH 038/380] rename enable --- tensorflow/c/eager/c_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) mode change 100644 => 100755 tensorflow/c/eager/c_api.h diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h old mode 100644 new mode 100755 index db0079b0de..eec2750d6e --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -76,7 +76,7 @@ typedef enum TFE_ContextDevicePlacementPolicy { // Sets the default execution mode (sync/async). Note that this can be // overridden per thread using TFE_ContextSetAsyncForThread. TF_CAPI_EXPORT extern void TFE_ContextOptionsSetAsync(TFE_ContextOptions*, - unsigned char async_); + unsigned char enable); TF_CAPI_EXPORT extern void TFE_ContextOptionsSetDevicePlacementPolicy( TFE_ContextOptions*, TFE_ContextDevicePlacementPolicy); @@ -114,7 +114,7 @@ TFE_ContextGetDevicePlacementPolicy(TFE_Context*); // Overrides the execution mode (sync/async) for the current thread. TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context*, - unsigned char async_, + unsigned char enable, TF_Status* status); // A tensorflow.ServerDef specifies remote workers (in addition to the current -- GitLab From c521738635ed5c50e31be2e87305e49c7dfeb601 Mon Sep 17 00:00:00 2001 From: bstriner Date: Tue, 14 Aug 2018 05:31:58 -0400 Subject: [PATCH 039/380] workspace --- tensorflow/workspace.bzl | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 67beb17978..2cf1c86395 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -359,14 +359,18 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): }, ) + PROTOBUF_urls =[ + "https://mirror.bazel.build/github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", + "https://github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", + ] + PROTOBUF_sha256 = "b91b0ac9907af983877c960809dcad7a6dc8e4b06e34d32b7d66a12b9ea1fa17" + PROTOBUF_strip_prefix = "protobuf-fe2eef4bf414ebb352cf11bcec633f1fd46ec876" + tf_http_archive( name = "protobuf_archive", - urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", - "https://github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", - ], - sha256 = "b91b0ac9907af983877c960809dcad7a6dc8e4b06e34d32b7d66a12b9ea1fa17", - strip_prefix = "protobuf-fe2eef4bf414ebb352cf11bcec633f1fd46ec876", + urls = PROTOBUF_urls, + sha256 = PROTOBUF_sha256, + strip_prefix = PROTOBUF_strip_prefix, ) # We need to import the protobuf library under the names com_google_protobuf @@ -374,22 +378,16 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): # Unfortunately there is no way to alias http_archives at the moment. tf_http_archive( name = "com_google_protobuf", - urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", - "https://github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", - ], - sha256 = "b91b0ac9907af983877c960809dcad7a6dc8e4b06e34d32b7d66a12b9ea1fa17", - strip_prefix = "protobuf-fe2eef4bf414ebb352cf11bcec633f1fd46ec876", + urls = PROTOBUF_urls, + sha256 = PROTOBUF_sha256, + strip_prefix = PROTOBUF_strip_prefix, ) tf_http_archive( name = "com_google_protobuf_cc", - urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", - "https://github.com/google/protobuf/archive/fe2eef4bf414ebb352cf11bcec633f1fd46ec876.tar.gz", - ], - sha256 = "b91b0ac9907af983877c960809dcad7a6dc8e4b06e34d32b7d66a12b9ea1fa17", - strip_prefix = "protobuf-fe2eef4bf414ebb352cf11bcec633f1fd46ec876", + urls = PROTOBUF_urls, + sha256 = PROTOBUF_sha256, + strip_prefix = PROTOBUF_strip_prefix, ) tf_http_archive( -- GitLab From c2687096e60f443d445c8871ab54ce095137018e Mon Sep 17 00:00:00 2001 From: "karl@kubx.ca" Date: Tue, 7 Aug 2018 22:40:25 -0400 Subject: [PATCH 040/380] Render secondary factory for default output types --- tensorflow/java/src/gen/cc/java_defs.h | 30 +++++++++ tensorflow/java/src/gen/cc/op_generator.cc | 74 +++++++++++++++++++++ tensorflow/java/src/gen/cc/op_specs.cc | 45 ++----------- tensorflow/java/src/gen/cc/op_specs.h | 11 +-- tensorflow/java/src/gen/cc/source_writer.cc | 1 - 5 files changed, 116 insertions(+), 45 deletions(-) diff --git a/tensorflow/java/src/gen/cc/java_defs.h b/tensorflow/java/src/gen/cc/java_defs.h index d9d6f8adc8..d39653ef41 100644 --- a/tensorflow/java/src/gen/cc/java_defs.h +++ b/tensorflow/java/src/gen/cc/java_defs.h @@ -21,6 +21,8 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/types.h" + namespace tensorflow { namespace java { @@ -95,6 +97,34 @@ class Type { static Type IterableOf(const Type& type) { return Interface("Iterable").add_parameter(type); } + static Type ForDataType(DataType data_type) { + switch (data_type) { + case DataType::DT_BOOL: + return Class("Boolean"); + case DataType::DT_STRING: + return Class("String"); + case DataType::DT_FLOAT: + return Class("Float"); + case DataType::DT_DOUBLE: + return Class("Double"); + case DataType::DT_UINT8: + return Class("UInt8", "org.tensorflow.types"); + case DataType::DT_INT32: + return Class("Integer"); + case DataType::DT_INT64: + return Class("Long"); + case DataType::DT_RESOURCE: + // TODO(karllessard) create a Resource utility class that could be + // used to store a resource and its type (passed in a second argument). + // For now, we need to force a wildcard and we will unfortunately lose + // track of the resource type. + // Falling through... + default: + // Any other datatypes does not have a equivalent in Java and must + // remain a wildcard (e.g. DT_COMPLEX64, DT_QINT8, ...) + return Wildcard(); + } + } const Kind& kind() const { return kind_; } const string& name() const { return name_; } const string& package() const { return package_; } diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc index d5bd99bdd9..8587d4dc30 100644 --- a/tensorflow/java/src/gen/cc/op_generator.cc +++ b/tensorflow/java/src/gen/cc/op_generator.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include #include +#include #include "tensorflow/core/framework/op_gen_lib.h" #include "tensorflow/core/lib/core/errors.h" @@ -100,6 +101,10 @@ void CollectOpDependencies(const OpSpec& op, RenderMode mode, for (const AttributeSpec& attribute : op.attributes()) { out->push_back(attribute.var().type()); out->push_back(attribute.jni_type()); + if (attribute.has_default_value() + && attribute.type().kind() == Type::GENERIC) { + out->push_back(Type::ForDataType(attribute.default_value()->type())); + } } for (const AttributeSpec& optional_attribute : op.optional_attributes()) { out->push_back(optional_attribute.var().type()); @@ -139,6 +144,60 @@ void WriteSetAttrDirective(const AttributeSpec& attr, bool optional, } } +void RenderSecondaryFactoryMethod(const OpSpec& op, const Type& op_class, + std::map default_types, + SourceWriter* writer) { + // Build the return type for the secondary factory, replacing generic + // parameters with their default value if any + Type return_type = Type::Class(op_class.name(), op_class.package()); + for (const Type& parameter : op_class.parameters()) { + if (parameter.kind() == Type::GENERIC + && default_types.find(parameter.name()) != default_types.end()) { + return_type.add_parameter(default_types.at(parameter.name())); + } else { + return_type.add_parameter(parameter); + } + } + Method factory = Method::Create("create", return_type); + Javadoc factory_doc = + Javadoc::Create("Factory method to create a class to wrap a new " + + op_class.name() + " operation to the graph, using " + "default output types."); + Variable scope = + Variable::Create("scope", Type::Class("Scope", "org.tensorflow.op")); + AddArgument(scope, "current graph scope", &factory, &factory_doc); + std::stringstream factory_statement; + factory_statement << "return create(scope"; + for (const ArgumentSpec& input : op.inputs()) { + AddArgument(input.var(), input.description(), &factory, &factory_doc); + factory_statement << ", " << input.var().name(); + } + for (const AttributeSpec& attr : op.attributes()) { + // Only add attributes that are not types or have no default value to the + // signature of the secondary factory + factory_statement << ", "; + if (attr.type().kind() == Type::GENERIC + && default_types.find(attr.type().name()) != default_types.end()) { + factory_statement << default_types.at(attr.type().name()).name() + << ".class"; + } else { + AddArgument(attr.var(), attr.description(), &factory, &factory_doc); + factory_statement << attr.var().name(); + } + } + if (!op.optional_attributes().empty()) { + Variable options_var = Variable::Varargs("options", Type::Class("Options")); + AddArgument(options_var, "carries optional attributes values", &factory, + &factory_doc); + factory_statement << ", " << options_var.name(); + } + factory_doc.add_tag("return", "a new instance of " + op_class.name()); + + writer->BeginMethod(factory, PUBLIC | STATIC, &factory_doc); + writer->Append(factory_statement.str()).Append(");").EndLine(); + writer->EndMethod(); +} + void RenderFactoryMethods(const OpSpec& op, const Type& op_class, SourceWriter* writer) { Method factory = Method::Create("create", op_class); @@ -151,8 +210,17 @@ void RenderFactoryMethods(const OpSpec& op, const Type& op_class, for (const ArgumentSpec& input : op.inputs()) { AddArgument(input.var(), input.description(), &factory, &factory_doc); } + std::map default_types; for (const AttributeSpec& attr : op.attributes()) { AddArgument(attr.var(), attr.description(), &factory, &factory_doc); + // If this attribute is a type with a default value, save its value + // for passing it implicitly in a secondary factory method + if (attr.has_default_value() && attr.type().kind() == Type::GENERIC) { + Type default_type = Type::ForDataType(attr.default_value()->type()); + if (!default_type.wildcard()) { + default_types.insert(std::make_pair(attr.type().name(), default_type)); + } + } } if (!op.optional_attributes().empty()) { AddArgument(Variable::Varargs("options", Type::Class("Options")), @@ -194,6 +262,12 @@ void RenderFactoryMethods(const OpSpec& op, const Type& op_class, .Append("(opBuilder.build());") .EndLine(); writer->EndMethod(); + + // If this operation has type attributes with a default value, create a + // second factory method that infers those values implicitly + if (!default_types.empty()) { + RenderSecondaryFactoryMethod(op, op_class, default_types, writer); + } } void RenderConstructor(const OpSpec& op, const Type& op_class, diff --git a/tensorflow/java/src/gen/cc/op_specs.cc b/tensorflow/java/src/gen/cc/op_specs.cc index 941ab2699c..b2f2fb18a9 100644 --- a/tensorflow/java/src/gen/cc/op_specs.cc +++ b/tensorflow/java/src/gen/cc/op_specs.cc @@ -96,43 +96,10 @@ Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) { *iterable_out = true; visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int())); } - Type type = Type::Wildcard(); if (arg_def.type() != DataType::DT_INVALID) { - // resolve type from DataType - switch (arg_def.type()) { - case DataType::DT_BOOL: - type = Type::Class("Boolean"); - break; - case DataType::DT_STRING: - type = Type::Class("String"); - break; - case DataType::DT_FLOAT: - type = Type::Class("Float"); - break; - case DataType::DT_DOUBLE: - type = Type::Class("Double"); - break; - case DataType::DT_UINT8: - type = Type::Class("UInt8", "org.tensorflow.types"); - break; - case DataType::DT_INT32: - type = Type::Class("Integer"); - break; - case DataType::DT_INT64: - type = Type::Class("Long"); - break; - case DataType::DT_RESOURCE: - // TODO(karllessard) create a Resource utility class that could be - // used to store a resource and its type (passed in a second argument). - // For now, we need to force a wildcard and we will unfortunately lose - // track of the resource type. - break; - default: - // Any other datatypes does not have a equivalent in Java and must - // remain a wildcard (e.g. DT_COMPLEX64, DT_QINT8, ...) - break; - } + type = Type::ForDataType(arg_def.type()); + } else if (!arg_def.type_attr().empty()) { // resolve type from attribute (if already visited, retrieve its type) if (IsAttributeVisited(arg_def.type_attr())) { @@ -337,16 +304,16 @@ AttributeSpec CreateAttribute(const OpDef_AttrDef& attr_def, bool iterable = false; std::pair types = type_resolver->TypesOf(attr_def, &iterable); Type var_type = types.first.kind() == Type::GENERIC - ? Type::Class("Class").add_parameter(types.first) - : types.first; + ? Type::ClassOf(types.first) : types.first; if (iterable) { var_type = Type::ListOf(var_type); } return AttributeSpec( attr_api_def.name(), Variable::Create(SnakeToCamelCase(attr_api_def.rename_to()), var_type), - types.first, types.second, ParseDocumentation(attr_api_def.description()), - iterable, attr_api_def.has_default_value()); + types.first, types.second, + ParseDocumentation(attr_api_def.description()), iterable, + attr_def.has_default_value() ? &attr_def.default_value() : nullptr); } ArgumentSpec CreateOutput(const OpDef_ArgDef& output_def, diff --git a/tensorflow/java/src/gen/cc/op_specs.h b/tensorflow/java/src/gen/cc/op_specs.h index 30ecb8ce53..7ad19af562 100644 --- a/tensorflow/java/src/gen/cc/op_specs.h +++ b/tensorflow/java/src/gen/cc/op_specs.h @@ -94,18 +94,18 @@ class AttributeSpec { // jni_type: the type of this attribute in JNI layer (see OperationBuilder) // description: a description of this attribute, in javadoc // iterable: true if this attribute is a list - // has_default_value: true if this attribute has a default value if not set + // default_value: default value for this attribute or nullptr if none AttributeSpec(const string& op_def_name, const Variable& var, const Type& type, const Type& jni_type, const string& description, bool iterable, - bool has_default_value) + const AttrValue* default_value) : op_def_name_(op_def_name), var_(var), type_(type), description_(description), iterable_(iterable), jni_type_(jni_type), - has_default_value_(has_default_value) {} + default_value_(default_value) {} const string& op_def_name() const { return op_def_name_; } const Variable& var() const { return var_; } @@ -113,7 +113,8 @@ class AttributeSpec { const string& description() const { return description_; } bool iterable() const { return iterable_; } const Type& jni_type() const { return jni_type_; } - bool has_default_value() const { return has_default_value_; } + bool has_default_value() const { return default_value_ != nullptr; } + const AttrValue* default_value() const { return default_value_; } private: const string op_def_name_; @@ -122,7 +123,7 @@ class AttributeSpec { const string description_; const bool iterable_; const Type jni_type_; - const bool has_default_value_; + const AttrValue* default_value_; }; class OpSpec { diff --git a/tensorflow/java/src/gen/cc/source_writer.cc b/tensorflow/java/src/gen/cc/source_writer.cc index 8e5fba7e32..a71b367691 100644 --- a/tensorflow/java/src/gen/cc/source_writer.cc +++ b/tensorflow/java/src/gen/cc/source_writer.cc @@ -16,7 +16,6 @@ limitations under the License. #include #include #include -#include #include "tensorflow/java/src/gen/cc/source_writer.h" -- GitLab From 66a6473283aae36889fd80419b407a34c763e1d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 15 Aug 2018 12:55:26 +0800 Subject: [PATCH 041/380] CLN: use ones op --- tensorflow/python/ops/array_grad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 2beb58d534..6ae869b89e 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -772,7 +772,7 @@ def _ExtractImagePatchesGrad(op, grad): sp_shape = (input_indices_num, output_indices_num) sp_mat_full = sparse_tensor.SparseTensor( idx_map, - array_ops.ones_like(idx_map[:, 0], dtype=grad.dtype), + array_ops.ones([output_indices_num], dtype=grad.dtype), sp_shape) # Remove all padding locations [0, :]. sp_mat = sparse_ops.sparse_slice(sp_mat_full, -- GitLab From e233afe9810e7a80b7b5d6e91fd87c5f47f2e72e Mon Sep 17 00:00:00 2001 From: Vitaly Lavrukhin Date: Wed, 15 Aug 2018 08:42:30 -0700 Subject: [PATCH 042/380] Switched to Eigen exp/log --- tensorflow/core/util/ctc/ctc_beam_search.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/util/ctc/ctc_beam_search.h b/tensorflow/core/util/ctc/ctc_beam_search.h index fa58b5cedf..b8390d3650 100644 --- a/tensorflow/core/util/ctc/ctc_beam_search.h +++ b/tensorflow/core/util/ctc/ctc_beam_search.h @@ -263,9 +263,9 @@ void CTCBeamSearchDecoder::Step( // Get normalization term of softmax: log(sum(exp(logit[j]-max_coeff))). float logsumexp = 0.0; for (int j = 0; j < raw_input.size(); ++j) { - logsumexp += expf(raw_input(j) - max_coeff); + logsumexp += Eigen::numext::exp(raw_input(j) - max_coeff); } - logsumexp = logf(logsumexp); + logsumexp = Eigen::numext::log(logsumexp); // Final normalization offset to get correct log probabilities. float norm_offset = max_coeff + logsumexp; -- GitLab From 257067f0ad93ef170c73907323f26dce1ab841f2 Mon Sep 17 00:00:00 2001 From: Vitaly Lavrukhin Date: Wed, 15 Aug 2018 08:48:14 -0700 Subject: [PATCH 043/380] Updated keras CTC beam search decoder tests --- tensorflow/python/keras/backend_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py index 40e7910061..5dbaaa513d 100644 --- a/tensorflow/python/keras/backend_test.py +++ b/tensorflow/python/keras/backend_test.py @@ -1384,8 +1384,8 @@ class TestCTC(test.TestCase): np.array([seq_len_0], dtype=np.int32)) # batch_size length vector of negative log probabilities log_prob_truth = np.array([ - 0.584855, # output beam 0 - 0.389139 # output beam 1 + -3.5821197, # output beam 0 + -3.777835 # output beam 1 ], np.float32)[np.newaxis, :] decode_truth = [np.array([1, 0]), np.array([0, 1, 0])] -- GitLab From 00ddbca932f40c50aab40489981304ff4ed590e2 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Fri, 17 Aug 2018 12:31:01 -0700 Subject: [PATCH 044/380] variable renaming per code review suggestions --- tensorflow/core/kernels/mkl_avgpooling_op.cc | 14 +++++++------- tensorflow/core/kernels/mkl_maxpooling_op.cc | 16 ++++++++-------- .../core/kernels/mkl_pooling_ops_common.cc | 8 ++++---- tensorflow/core/kernels/mkl_pooling_ops_common.h | 8 ++++---- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc index 749b2a1838..2409f7e9dc 100644 --- a/tensorflow/core/kernels/mkl_avgpooling_op.cc +++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc @@ -454,7 +454,7 @@ class MklAvgPoolingOp : public MklPoolingForwardOpBase { // initialize variables for the pooling op MklPoolParameters pool_params; // check whether pooling is 2D or 3D - bool isPool2D = (this->ksize_.size() == 4); + bool is_pool2d = (this->ksize_.size() == 4); // Get the input tensor and initialize the pooling parameters TensorShape input_tensor_shape = input_tensor.shape(); this->InitMklPoolParameters(context, &pool_params, dnn_shape_input, @@ -477,13 +477,13 @@ class MklAvgPoolingOp : public MklPoolingForwardOpBase { memory::dims filter_dims, strides, padding_left, padding_right; // Get src/filter/stride/padding information this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right, isPool2D); + &padding_left, &padding_right, is_pool2d); // Get the input memory descriptor memory::dims src_dims = dnn_shape_input.IsMklTensor() ? dnn_shape_input.GetSizesAsMklDnnDims() - : isPool2D ? TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), + : is_pool2d ? TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), this->data_format_tf_) : TFShapeToMklDnnDimsInNCDHW(input_tensor.shape(), this->data_format_tf_); @@ -564,18 +564,18 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { orig_input_shape.AddDim(shape_vec(i)); } - bool isPool2D = (this->ksize_.size() == 4); + bool is_pool2d = (this->ksize_.size() == 4); this->InitMklPoolParameters(context, &pool_params, orig_input_mkl_shape, orig_input_shape); memory::dims filter_dims, strides, padding_left, padding_right; this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right, isPool2D); + &padding_left, &padding_right, is_pool2d); memory::dims orig_input_dims_mkl_order = orig_input_mkl_shape.IsMklTensor() ? orig_input_mkl_shape.GetSizesAsMklDnnDims() - : isPool2D ? TFShapeToMklDnnDimsInNCHW(orig_input_shape, + : is_pool2d ? TFShapeToMklDnnDimsInNCHW(orig_input_shape, this->data_format_tf_) : TFShapeToMklDnnDimsInNCDHW(orig_input_shape, this->data_format_tf_); @@ -583,7 +583,7 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { memory::dims diff_dst_dims = grad_mkl_shape.IsMklTensor() ? grad_mkl_shape.GetSizesAsMklDnnDims() - : isPool2D ? TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), + : is_pool2d ? TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), this->data_format_tf_) : TFShapeToMklDnnDimsInNCDHW(grad_tensor.shape(), this->data_format_tf_); diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc index aa7c0d9b7f..256d48f4d5 100644 --- a/tensorflow/core/kernels/mkl_maxpooling_op.cc +++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc @@ -525,7 +525,7 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase { // initialize variables for the pooling op MklPoolParameters pool_params; // check whether pooling is 2D or 3D - bool isPool2D = (this->ksize_.size() == 4); + bool is_pool2d = (this->ksize_.size() == 4); // Get the input tensor and initialize the pooling parameters TensorShape input_tensor_shape = input_tensor.shape(); this->InitMklPoolParameters(context, &pool_params, dnn_shape_input, @@ -549,7 +549,7 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase { memory::desc input_md = dnn_shape_input.IsMklTensor() ? dnn_shape_input.GetMklLayout() - : isPool2D ? memory::desc( + : is_pool2d ? memory::desc( TFShapeToMklDnnDimsInNCHW(input_tensor_shape, this->data_format_tf_), MklDnnType(), this->data_format_mkldnn_) @@ -562,13 +562,13 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase { memory::dims src_dims = dnn_shape_input.IsMklTensor() ? dnn_shape_input.GetSizesAsMklDnnDims() - : isPool2D ? TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), + : is_pool2d ? TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), this->data_format_tf_) : TFShapeToMklDnnDimsInNCDHW(input_tensor.shape(), this->data_format_tf_); memory::dims filter_dims, strides, padding_left, padding_right; this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right, isPool2D); + &padding_left, &padding_right, is_pool2d); // Get a pooling op from the cached pool MklPoolingFwdPrimitive* pooling_fwd = nullptr; @@ -672,18 +672,18 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase { MklPoolParameters pool_params; TensorShape orig_input_shape = orig_input_tensor.shape(); - bool isPool2D = (this->ksize_.size() == 4); + bool is_pool2d = (this->ksize_.size() == 4); this->InitMklPoolParameters(context, &pool_params, orig_input_mkl_shape, orig_input_shape); memory::dims filter_dims, strides, padding_left, padding_right; this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right, isPool2D); + &padding_left, &padding_right, is_pool2d); memory::dims orig_input_dims_mkl_order = orig_input_mkl_shape.IsMklTensor() ? orig_input_mkl_shape.GetSizesAsMklDnnDims() - : isPool2D ? TFShapeToMklDnnDimsInNCHW(orig_input_shape, + : is_pool2d ? TFShapeToMklDnnDimsInNCHW(orig_input_shape, this->data_format_tf_) : TFShapeToMklDnnDimsInNCDHW(orig_input_shape, this->data_format_tf_); @@ -691,7 +691,7 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase { memory::dims diff_dst_dims = grad_mkl_shape.IsMklTensor() ? grad_mkl_shape.GetSizesAsMklDnnDims() - : isPool2D ? TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), + : is_pool2d ? TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), this->data_format_tf_) : TFShapeToMklDnnDimsInNCDHW(grad_tensor.shape(), this->data_format_tf_); diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc index 5d02ceea12..ec6d241e17 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc @@ -284,8 +284,8 @@ void MklPoolParameters::Init(OpKernelContext* context, // Get the data format this->data_format = data_format; - bool isPool2D = (ksize.size() == 4); - if (isPool2D) { + bool is_pool2d = (ksize.size() == 4); + if (is_pool2d) { // Pool2D // Get the output sizes window_rows = GetTensorDim(ksize, data_format, 'H'); @@ -329,7 +329,7 @@ void MklPoolParameters::Init(OpKernelContext* context, } if (depth_window == 1) { // we are pooling in the D (Pool3D only), H and W - if (!isPool2D) { + if (!is_pool2d) { OP_REQUIRES_OK( context, GetWindowedOutputSizeVerbose(tensor_in_planes, window_planes, planes_stride, padding, @@ -348,7 +348,7 @@ void MklPoolParameters::Init(OpKernelContext* context, // Fail if the depth, height or width are greater than MAX_INT // We check depth only for 3D pooling case - if (!isPool2D) { + if (!is_pool2d) { OP_REQUIRES(context, FastBoundsCheck(out_planes, std::numeric_limits::max()), errors::InvalidArgument("output depth/planes is too large")); diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index ea7458062c..49f799d7ba 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -458,9 +458,9 @@ class MklPoolingOpBase : public OpKernel { OP_REQUIRES(context, this->ksize_[0] == 1 && this->stride_[0] == 1, errors::Unimplemented("Pooling is not yet supported on the " "batch dimension.")); - bool isPool2D = (this->ksize_.size() == 4); + bool is_pool2d = (this->ksize_.size() == 4); this->data_format_mkldnn_ = - isPool2D ? TFDataFormatToMklDnnDataFormat(this->data_format_tf_) + is_pool2d ? TFDataFormatToMklDnnDataFormat(this->data_format_tf_) : TFDataFormatToMklDnn3DDataFormat(this->data_format_tf_); // We may not get this attribute for this node if it does not go through @@ -510,8 +510,8 @@ class MklPoolingOpBase : public OpKernel { void PoolParamsToDims(const MklPoolParameters* pool_params, memory::dims* filter_dims, memory::dims* strides, memory::dims* padding_left, memory::dims* padding_right, - bool isPool2D) { - if (isPool2D) { + bool is_pool2d) { + if (is_pool2d) { // Pool2D *filter_dims = memory::dims({pool_params->window_rows, pool_params->window_cols}); -- GitLab From 8c3457521f719736a7ac109bf7debbedd7fe4584 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 19 Aug 2018 20:48:25 +0000 Subject: [PATCH 045/380] Fix compilation failure with RDMA+GDR This fix tries to address the issue raised in 21696 where tensorflow failed to compile when both RDMA and GDR are on. The issue is that the memory allocator of GDR used the same name as RDMA. This fix fixes 21696. Signed-off-by: Yong Tang --- tensorflow/contrib/gdr/gdr_memory_manager.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc index 7e6a0f14f6..c6bb02389d 100644 --- a/tensorflow/contrib/gdr/gdr_memory_manager.cc +++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc @@ -186,22 +186,22 @@ class GdrMemoryManager : public RemoteMemoryManager { // TODO(byronyi): remove this class and its registration when the default // cpu_allocator() returns visitable allocator, or cpu_allocator() is no // longer in use. -class BFCRdmaAllocator : public BFCAllocator { +class BFCGdrAllocator : public BFCAllocator { public: - BFCRdmaAllocator() + BFCGdrAllocator() : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36, true, "cpu_rdma_bfc") {} }; -class BFCRdmaAllocatorFactory : public AllocatorFactory { +class BFCGdrAllocatorFactory : public AllocatorFactory { public: - Allocator* CreateAllocator() override { return new BFCRdmaAllocator; } + Allocator* CreateAllocator() override { return new BFCGdrAllocator; } virtual SubAllocator* CreateSubAllocator(int numa_node) { return new BasicCPUAllocator(numa_node); } }; -REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory); +REGISTER_MEM_ALLOCATOR("BFCGdrAllocator", 101, BFCGdrAllocatorFactory); GdrMemoryManager::GdrMemoryManager(const string& host, const string& port) : host_(host), -- GitLab From e1ca6dca575d58a7ab3264c8907ff05d98a6ddeb Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 19 Aug 2018 20:54:59 +0000 Subject: [PATCH 046/380] Rename allocator name to cpu_gdr_bfc Signed-off-by: Yong Tang --- tensorflow/contrib/gdr/gdr_memory_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc index c6bb02389d..f464760f90 100644 --- a/tensorflow/contrib/gdr/gdr_memory_manager.cc +++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc @@ -190,7 +190,7 @@ class BFCGdrAllocator : public BFCAllocator { public: BFCGdrAllocator() : BFCAllocator(new BasicCPUAllocator(port::kNUMANoAffinity), 1LL << 36, - true, "cpu_rdma_bfc") {} + true, "cpu_gdr_bfc") {} }; class BFCGdrAllocatorFactory : public AllocatorFactory { public: -- GitLab From 0eaf660d1614a76070883f2cc4de42e1d82cd788 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 4 Aug 2018 22:24:29 +0000 Subject: [PATCH 047/380] Add tf.contrib.data.LMDBDataset support This fix tries to address the issue raised in 21129 where there was no LMDBDataset support (only LMDBReader). This fix fixes 21129. Signed-off-by: Yong Tang --- tensorflow/contrib/data/kernels/BUILD | 13 ++ .../contrib/data/kernels/lmdb_dataset_op.cc | 211 ++++++++++++++++++ tensorflow/contrib/data/ops/dataset_ops.cc | 6 + 3 files changed, 230 insertions(+) create mode 100644 tensorflow/contrib/data/kernels/lmdb_dataset_op.cc diff --git a/tensorflow/contrib/data/kernels/BUILD b/tensorflow/contrib/data/kernels/BUILD index 2e249f5c14..3ccaf0fddb 100644 --- a/tensorflow/contrib/data/kernels/BUILD +++ b/tensorflow/contrib/data/kernels/BUILD @@ -51,6 +51,18 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "lmdb_dataset_op", + srcs = ["lmdb_dataset_op.cc"], + deps = [ + "//tensorflow/core:framework_headers_lib", + "//third_party/eigen3", + "@lmdb", + "@protobuf_archive//:protobuf_headers", + ], +) + + cc_library( name = "threadpool_dataset_op", srcs = ["threadpool_dataset_op.cc"], @@ -91,6 +103,7 @@ cc_library( ":csv_dataset_op", ":directed_interleave_dataset_op", ":ignore_errors_dataset_op", + ":lmdb_dataset_op", ":prefetching_kernels", ":threadpool_dataset_op", ":unique_dataset_op", diff --git a/tensorflow/contrib/data/kernels/lmdb_dataset_op.cc b/tensorflow/contrib/data/kernels/lmdb_dataset_op.cc new file mode 100644 index 0000000000..c6eb1c5cd6 --- /dev/null +++ b/tensorflow/contrib/data/kernels/lmdb_dataset_op.cc @@ -0,0 +1,211 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/lib/io/buffered_inputstream.h" +#include "tensorflow/core/platform/file_system.h" + +#include +#include "lmdb.h" + +namespace tensorflow { +namespace { + +class LMDBDatasetOp : public DatasetOpKernel { + public: + using DatasetOpKernel::DatasetOpKernel; + void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { + const Tensor* filenames_tensor; + OP_REQUIRES_OK(ctx, ctx->input("filenames", &filenames_tensor)); + OP_REQUIRES( + ctx, filenames_tensor->dims() <= 1, + errors::InvalidArgument("`filenames` must be a scalar or a vector.")); + + std::vector filenames; + filenames.reserve(filenames_tensor->NumElements()); + for (int i = 0; i < filenames_tensor->NumElements(); ++i) { + filenames.push_back(filenames_tensor->flat()(i)); + } + + *output = new Dataset(ctx, filenames); + } + + private: + class Dataset : public GraphDatasetBase { + public: + Dataset(OpKernelContext* ctx, const std::vector& filenames) + : GraphDatasetBase(ctx), filenames_(filenames) {} + + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override { + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::LMDB")})); + } + + const DataTypeVector& output_dtypes() const override { + static DataTypeVector* dtypes = + new DataTypeVector({DT_STRING, DT_STRING}); + return *dtypes; + } + + const std::vector& output_shapes() const override { + static std::vector* shapes = + new std::vector({{}, {}}); + return *shapes; + } + + string DebugString() const override { return "LMDBDatasetOp::Dataset"; } + + protected: + Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Node** output) const override { + Node* filenames = nullptr; + TF_RETURN_IF_ERROR(b->AddVector(filenames_, &filenames)); + TF_RETURN_IF_ERROR(b->AddDataset(this, {filenames}, output)); + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + do { + if (mdb_cursor_) { + Tensor key_tensor(ctx->allocator({}), DT_STRING, {}); + key_tensor.scalar()() = string( + static_cast(mdb_key_.mv_data), mdb_key_.mv_size); + out_tensors->emplace_back(std::move(key_tensor)); + + Tensor value_tensor(ctx->allocator({}), DT_STRING, {}); + value_tensor.scalar()() = + string(static_cast(mdb_value_.mv_data), + mdb_value_.mv_size); + out_tensors->emplace_back(std::move(value_tensor)); + + int val; + val = mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_NEXT); + if (val != MDB_SUCCESS && val != MDB_NOTFOUND) { + return errors::InvalidArgument(mdb_strerror(val)); + } + if (val == MDB_NOTFOUND) { + ResetStreamsLocked(); + ++current_file_index_; + } + *end_of_sequence = false; + return Status::OK(); + } + if (current_file_index_ == dataset()->filenames_.size()) { + *end_of_sequence = true; + return Status::OK(); + } + + TF_RETURN_IF_ERROR(SetupStreamsLocked(ctx->env())); + } while (true); + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + return errors::Unimplemented("SaveInternal is currently not supported"); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + return errors::Unimplemented( + "RestoreInternal is currently not supported"); + } + + private: + Status SetupStreamsLocked(Env* env) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + if (current_file_index_ >= dataset()->filenames_.size()) { + return errors::InvalidArgument( + "current_file_index_:", current_file_index_, + " >= filenames_.size():", dataset()->filenames_.size()); + } + const string& filename = dataset()->filenames_[current_file_index_]; + + int val = mdb_env_create(&mdb_env_); + if (val != MDB_SUCCESS) { + return errors::InvalidArgument(mdb_strerror(val)); + } + int flags = MDB_RDONLY | MDB_NOTLS | MDB_NOLOCK; + + struct stat source_stat; + if (stat(filename.c_str(), &source_stat) == 0 && + (source_stat.st_mode & S_IFREG)) { + flags |= MDB_NOSUBDIR; + } + val = mdb_env_open(mdb_env_, filename.c_str(), flags, 0664); + if (val != MDB_SUCCESS) { + return errors::InvalidArgument(mdb_strerror(val)); + } + val = mdb_txn_begin(mdb_env_, nullptr, MDB_RDONLY, &mdb_txn_); + if (val != MDB_SUCCESS) { + return errors::InvalidArgument(mdb_strerror(val)); + } + val = mdb_dbi_open(mdb_txn_, nullptr, 0, &mdb_dbi_); + if (val != MDB_SUCCESS) { + return errors::InvalidArgument(mdb_strerror(val)); + } + val = mdb_cursor_open(mdb_txn_, mdb_dbi_, &mdb_cursor_); + if (val != MDB_SUCCESS) { + return errors::InvalidArgument(mdb_strerror(val)); + } + val = mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_FIRST); + if (val != MDB_SUCCESS && val != MDB_NOTFOUND) { + return errors::InvalidArgument(mdb_strerror(val)); + } + if (val == MDB_NOTFOUND) { + ResetStreamsLocked(); + } + return Status::OK(); + } + void ResetStreamsLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_) { + if (mdb_env_ != nullptr) { + if (mdb_cursor_) { + mdb_cursor_close(mdb_cursor_); + mdb_cursor_ = nullptr; + } + mdb_dbi_close(mdb_env_, mdb_dbi_); + mdb_txn_abort(mdb_txn_); + mdb_env_close(mdb_env_); + mdb_txn_ = nullptr; + mdb_dbi_ = 0; + mdb_env_ = nullptr; + } + } + mutex mu_; + size_t current_file_index_ GUARDED_BY(mu_) = 0; + MDB_env* mdb_env_ GUARDED_BY(mu_) = nullptr; + MDB_txn* mdb_txn_ GUARDED_BY(mu_) = nullptr; + MDB_dbi mdb_dbi_ GUARDED_BY(mu_) = 0; + MDB_cursor* mdb_cursor_ GUARDED_BY(mu_) = nullptr; + + MDB_val mdb_key_ GUARDED_BY(mu_); + MDB_val mdb_value_ GUARDED_BY(mu_); + }; + + const std::vector filenames_; + }; +}; +} +REGISTER_KERNEL_BUILDER(Name("LMDBDataset").Device(DEVICE_CPU), LMDBDatasetOp); + +} // namespace tensorflow diff --git a/tensorflow/contrib/data/ops/dataset_ops.cc b/tensorflow/contrib/data/ops/dataset_ops.cc index cc5e250ea1..f17de2878a 100644 --- a/tensorflow/contrib/data/ops/dataset_ops.cc +++ b/tensorflow/contrib/data/ops/dataset_ops.cc @@ -266,4 +266,10 @@ REGISTER_OP("AssertNextDataset") return shape_inference::ScalarShape(c); }); +REGISTER_OP("LMDBDataset") + .Input("filenames: string") + .Output("handle: variant") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape); + } // namespace tensorflow -- GitLab From 3da376758711410c374329b831a99c483c7d9299 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 4 Aug 2018 22:25:17 +0000 Subject: [PATCH 048/380] Expose LMDBDataset to python Signed-off-by: Yong Tang --- tensorflow/contrib/data/python/ops/readers.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index 3882d4bfdb..2f1aca3819 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -974,3 +974,46 @@ class SqlDataset(dataset_ops.Dataset): @property def output_types(self): return self._output_types + + +class LMDBDataset(dataset_ops.Dataset): + """A LMDB Dataset that reads the lmdb file.""" + + def __init__(self, filenames): + """Create a `LMDBDataset`. + `LMDBDataset` allows a user to read data from a mdb file as + (key value) pairs sequentially. + For example: + ```python + dataset = tf.contrib.lmdb.LMDBDataset("/foo/bar.mdb") + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + # Prints the (key, value) pairs inside a lmdb file. + while True: + try: + print(sess.run(next_element)) + except tf.errors.OutOfRangeError: + break + ``` + Args: + filenames: A `tf.string` tensor containing one or more filenames. + """ + super(LMDBDataset, self).__init__() + self._filenames = ops.convert_to_tensor( + filenames, dtype=dtypes.string, name="filenames") + + def _as_variant_tensor(self): + return contrib_gen_dataset_ops.lmdb_dataset( + self._filenames) + + @property + def output_classes(self): + return ops.Tensor, ops.Tensor + + @property + def output_shapes(self): + return (tensor_shape.TensorShape([]), tensor_shape.TensorShape([])) + + @property + def output_types(self): + return dtypes.string, dtypes.string -- GitLab From b99c3b0db167f2c1719a8d5d3ce3c8b3de867e47 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 4 Aug 2018 22:25:43 +0000 Subject: [PATCH 049/380] Add test cases for LMDBDataset Signed-off-by: Yong Tang --- .../contrib/data/python/kernel_tests/BUILD | 25 ++++++ .../kernel_tests/lmdb_dataset_op_test.py | 76 +++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 2b75aa2ca5..74d0a30eee 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -179,6 +179,31 @@ py_test( ], ) +py_test( + name = "lmdb_dataset_op_test", + size = "medium", + srcs = ["lmdb_dataset_op_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_windows", + "no_pip", + ], + data = ["//tensorflow/core:lmdb_testdata"], + deps = [ + "//tensorflow/contrib/data/python/ops:readers", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:platform", + "//tensorflow/python:platform_test", + "//tensorflow/python:session", + "//third_party/py/numpy", + ], +) + py_test( name = "map_dataset_op_test", size = "medium", diff --git a/tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py new file mode 100644 index 0000000000..5d7d4da113 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/lmdb_dataset_op_test.py @@ -0,0 +1,76 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for LMDBDatasetOp.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import shutil + +from tensorflow.contrib.data.python.ops import readers +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.platform import resource_loader +from tensorflow.python.platform import test +from tensorflow.python.util import compat + +class LMDBDatasetTest(test.TestCase): + + def setUp(self): + super(LMDBDatasetTest, self).setUp() + path = os.path.join( + resource_loader.get_root_dir_with_all_resources(), + "tensorflow", + "core", + "lib", + "lmdb", + "testdata", + "data.mdb") + + print(path) + # Copy database out because we need the path to be writable to use locks. + self.db_path = os.path.join(self.get_temp_dir(), "data.mdb") + shutil.copy(path, self.db_path) + + def testReadFromFile(self): + filename = self.db_path + + filenames = constant_op.constant([filename], dtypes.string) + num_repeats = 2 + + dataset = readers.LMDBDataset( + filenames).repeat(num_repeats) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for _ in range(num_repeats): # Dataset is repeated. + for i in range(10): # 10 records. + k = compat.as_bytes(str(i)) + v = compat.as_bytes(str(chr(ord("a") + i))) + self.assertEqual((k, v), sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + +if __name__ == "__main__": + test.main() -- GitLab From 5c86342bfe080681e691ab1d4da7351c49fc5f19 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 4 Aug 2018 22:25:57 +0000 Subject: [PATCH 050/380] Expose data ops to tf.contrib.data Signed-off-by: Yong Tang --- tensorflow/contrib/data/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 5821d51bca..2f0ac51763 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -25,6 +25,7 @@ See [Importing Data](https://tensorflow.org/guide/datasets) for an overview. @@Counter @@CheckpointInputPipelineHook @@CsvDataset +@@LMDBDataset @@RandomDataset @@Reducer @@SqlDataset @@ -93,6 +94,7 @@ from tensorflow.contrib.data.python.ops.prefetching_ops import copy_to_device from tensorflow.contrib.data.python.ops.prefetching_ops import prefetch_to_device from tensorflow.contrib.data.python.ops.random_ops import RandomDataset from tensorflow.contrib.data.python.ops.readers import CsvDataset +from tensorflow.contrib.data.python.ops.readers import LMDBDataset from tensorflow.contrib.data.python.ops.readers import make_batched_features_dataset from tensorflow.contrib.data.python.ops.readers import make_csv_dataset from tensorflow.contrib.data.python.ops.readers import read_batch_features -- GitLab From 454d3e11216bcf44583665d714227cd20f88739b Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 5 Aug 2018 16:07:17 +0000 Subject: [PATCH 051/380] Bazel BUILD file format fix with buildifier to address Sanity check failure Signed-off-by: Yong Tang --- tensorflow/contrib/data/kernels/BUILD | 1 - tensorflow/contrib/data/python/kernel_tests/BUILD | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/data/kernels/BUILD b/tensorflow/contrib/data/kernels/BUILD index 3ccaf0fddb..5058919530 100644 --- a/tensorflow/contrib/data/kernels/BUILD +++ b/tensorflow/contrib/data/kernels/BUILD @@ -62,7 +62,6 @@ cc_library( ], ) - cc_library( name = "threadpool_dataset_op", srcs = ["threadpool_dataset_op.cc"], diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 74d0a30eee..dc396221c4 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -183,12 +183,12 @@ py_test( name = "lmdb_dataset_op_test", size = "medium", srcs = ["lmdb_dataset_op_test.py"], + data = ["//tensorflow/core:lmdb_testdata"], srcs_version = "PY2AND3", tags = [ - "no_windows", "no_pip", + "no_windows", ], - data = ["//tensorflow/core:lmdb_testdata"], deps = [ "//tensorflow/contrib/data/python/ops:readers", "//tensorflow/python:client_testlib", -- GitLab From 1c9c3ff77cb86a6fba33e03784929cd4ceaf5dcb Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 21 Aug 2018 01:16:20 +0000 Subject: [PATCH 052/380] Update signature of the LMDBDataset to match recent changes Signed-off-by: Yong Tang --- tensorflow/contrib/data/kernels/lmdb_dataset_op.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/data/kernels/lmdb_dataset_op.cc b/tensorflow/contrib/data/kernels/lmdb_dataset_op.cc index c6eb1c5cd6..acc1e5fe82 100644 --- a/tensorflow/contrib/data/kernels/lmdb_dataset_op.cc +++ b/tensorflow/contrib/data/kernels/lmdb_dataset_op.cc @@ -43,10 +43,10 @@ class LMDBDatasetOp : public DatasetOpKernel { } private: - class Dataset : public GraphDatasetBase { + class Dataset : public DatasetBase { public: Dataset(OpKernelContext* ctx, const std::vector& filenames) - : GraphDatasetBase(ctx), filenames_(filenames) {} + : DatasetBase(DatasetContext(ctx)), filenames_(filenames) {} std::unique_ptr MakeIteratorInternal( const string& prefix) const override { @@ -69,7 +69,8 @@ class LMDBDatasetOp : public DatasetOpKernel { string DebugString() const override { return "LMDBDatasetOp::Dataset"; } protected: - Status AsGraphDefInternal(DatasetGraphDefBuilder* b, + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, Node** output) const override { Node* filenames = nullptr; TF_RETURN_IF_ERROR(b->AddVector(filenames_, &filenames)); -- GitLab From 32b89c36e17abcf541f06a99aaba5c336dad1258 Mon Sep 17 00:00:00 2001 From: Aurelien Geron Date: Tue, 21 Aug 2018 10:23:50 +0200 Subject: [PATCH 053/380] Add more doc for the dict returned by an estimator's evaluate() method --- tensorflow/docs_src/guide/premade_estimators.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/docs_src/guide/premade_estimators.md b/tensorflow/docs_src/guide/premade_estimators.md index a1703058c3..9b64d51b98 100644 --- a/tensorflow/docs_src/guide/premade_estimators.md +++ b/tensorflow/docs_src/guide/premade_estimators.md @@ -366,6 +366,8 @@ Running this code yields the following output (or something similar): Test set accuracy: 0.967 ``` +The `eval_result` dictionary also contains the `average_loss` (mean loss per sample), the `loss` (mean loss per mini-batch) and the value of the estimator's `global_step` (the number of training iterations it underwent). + ### Making predictions (inferring) from the trained model We now have a trained model that produces good evaluation results. -- GitLab From 0c5683c50b2f4afc124ac7c4b61e316b4130b97d Mon Sep 17 00:00:00 2001 From: Aurelien Geron Date: Tue, 21 Aug 2018 10:31:29 +0200 Subject: [PATCH 054/380] Add more doc for the dict returned by an estimator's evaluate() method in the api doc --- tensorflow/python/estimator/estimator.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index f7ee42c7f6..bcbd7b7933 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -431,7 +431,11 @@ class Estimator(object): Returns: A dict containing the evaluation metrics specified in `model_fn` keyed by name, as well as an entry `global_step` which contains the value of the - global step for which this evaluation was performed. + global step for which this evaluation was performed. For canned + estimators, the dict contains the `loss` (mean loss per mini-batch) and + the `average_loss` (mean loss per sample). Canned classifiers also return + the `accuracy`. Canned regressors also return the `label/mean` and the + `prediction/mean`. Raises: ValueError: If `steps <= 0`. -- GitLab From eea511b182120c3e682634b9175770e3e759695d Mon Sep 17 00:00:00 2001 From: Kate Hodesdon Date: Tue, 21 Aug 2018 11:25:36 -0400 Subject: [PATCH 055/380] updated broken link to Estimators page --- tensorflow/docs_src/guide/saved_model.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/guide/saved_model.md b/tensorflow/docs_src/guide/saved_model.md index 6c967fd882..26853d6604 100644 --- a/tensorflow/docs_src/guide/saved_model.md +++ b/tensorflow/docs_src/guide/saved_model.md @@ -2,7 +2,7 @@ The `tf.train.Saver` class provides methods to save and restore models. The `tf.saved_model.simple_save` function is an easy way to build a -`tf.saved_model` suitable for serving. [Estimators](./estimators) +`tf.saved_model` suitable for serving. [Estimators](./guide/estimators.md) automatically save and restore variables in the `model_dir`. ## Save and restore variables -- GitLab From 0a89cfc9702f6e2752a26cddbce8e0f85afa69c5 Mon Sep 17 00:00:00 2001 From: Kate Hodesdon Date: Tue, 21 Aug 2018 11:27:38 -0400 Subject: [PATCH 056/380] updated broken link to Estimators page --- tensorflow/docs_src/guide/saved_model.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/guide/saved_model.md b/tensorflow/docs_src/guide/saved_model.md index 26853d6604..33ab891861 100644 --- a/tensorflow/docs_src/guide/saved_model.md +++ b/tensorflow/docs_src/guide/saved_model.md @@ -2,7 +2,7 @@ The `tf.train.Saver` class provides methods to save and restore models. The `tf.saved_model.simple_save` function is an easy way to build a -`tf.saved_model` suitable for serving. [Estimators](./guide/estimators.md) +`tf.saved_model` suitable for serving. [Estimators](../guide/estimators.md) automatically save and restore variables in the `model_dir`. ## Save and restore variables -- GitLab From db2cb377e3e2a0d12e7090cddf023f58dae38946 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Tue, 21 Aug 2018 13:08:25 -0700 Subject: [PATCH 057/380] Rename the artifact IDs for TensorFlow ecosystem jars --- tensorflow/java/maven/pom.xml | 4 ++-- tensorflow/java/maven/run_inside_container.sh | 6 +++--- .../{spark-connector => spark-tensorflow-connector}/pom.xml | 6 +++--- tensorflow/java/maven/{hadoop => tensorflow-hadoop}/pom.xml | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) rename tensorflow/java/maven/{spark-connector => spark-tensorflow-connector}/pom.xml (99%) rename tensorflow/java/maven/{hadoop => tensorflow-hadoop}/pom.xml (98%) diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 035077e1e0..e1bf2c7dba 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -32,8 +32,8 @@ libtensorflow_jni_gpu tensorflow proto - hadoop - spark-connector + tensorflow-hadoop + spark-tensorflow-connector Please import third_party/tensorflow_kfac.== -# ==== - -**K-FAC in TensorFlow** is an implementation of [K-FAC][kfac-paper], an -approximate second-order optimization method, in TensorFlow. When applied to -feedforward and convolutional neural networks, K-FAC can converge `>3.5x` -faster in `>14x` fewer iterations than SGD with Momentum. - -[kfac-paper]: https://arxiv.org/abs/1503.05671 - -## What is K-FAC? - -K-FAC, short for "Kronecker-factored Approximate Curvature", is an approximation -to the [Natural Gradient][natural_gradient] algorithm designed specifically for -neural networks. It maintains a block-diagonal approximation to the [Fisher -Information matrix][fisher_information], whose inverse preconditions the -gradient. - -K-FAC can be used in place of SGD, Adam, and other `Optimizer` implementations. -Experimentally, K-FAC converges `>3.5x` faster than well-tuned SGD. - -Unlike most optimizers, K-FAC exploits structure in the model itself (e.g. "What -are the weights for layer i?"). As such, you must add some additional code while -constructing your model to use K-FAC. - -[natural_gradient]: http://www.mitpressjournals.org/doi/abs/10.1162/089976698300017746 -[fisher_information]: https://en.wikipedia.org/wiki/Fisher_information#Matrix_form - -## Why should I use K-FAC? - -K-FAC can take advantage of the curvature of the optimization problem, resulting -in **faster training**. For an 8-layer Autoencoder, K-FAC converges to the same -loss as SGD with Momentum in 3.8x fewer seconds and 14.7x fewer updates. See how -training loss changes as a function of number of epochs, steps, and seconds: - -![autoencoder](g3doc/autoencoder.png) - -## Is K-FAC for me? - -If you have a feedforward or convolutional model for classification that is -converging too slowly, K-FAC is for you. K-FAC can be used in your model if: - -* Your model defines a posterior distribution. -* Your model uses only fully-connected or convolutional layers (residual - connections OK). -* You are training on CPU or GPU. -* You can modify model code to register layers with K-FAC. - -## How do I use K-FAC? - -Using K-FAC requires three steps: - -1. Registering layer inputs, weights, and pre-activations with a - `LayerCollection`. -1. Minimizing the loss with a `KfacOptimizer`. -1. Keeping K-FAC's preconditioner updated. - -```python -# Build model. -w = tf.get_variable("w", ...) -b = tf.get_variable("b", ...) -logits = tf.matmul(x, w) + b -loss = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)) - -# Register layers. -layer_collection = LayerCollection() -layer_collection.register_fully_connected((w, b), x, logits) -layer_collection.register_categorical_predictive_distribution(logits) - -# Construct training ops. -optimizer = KfacOptimizer(..., layer_collection=layer_collection) -train_op = optimizer.minimize(loss) - -# Minimize loss. -with tf.Session() as sess: - ... - sess.run([train_op, optimizer.cov_update_op, optimizer.inv_update_op]) -``` - -See [`examples/`](https://www.tensorflow.org/code/tensorflow/contrib/kfac/examples/) for runnable, end-to-end illustrations. - -## Authors - -- Alok Aggarwal -- Daniel Duckworth -- James Martens -- Matthew Johnson -- Olga Wichrowska -- Roger Grosse +## KFAC moved to third_party/tensorflow_kfac. diff --git a/tensorflow/contrib/kfac/__init__.py b/tensorflow/contrib/kfac/__init__.py deleted file mode 100644 index 1ea354e6cd..0000000000 --- a/tensorflow/contrib/kfac/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Kronecker-factored Approximate Curvature Optimizer.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import,line-too-long -from tensorflow.contrib.kfac.python.ops import curvature_matrix_vector_products_lib as curvature_matrix_vector_products -from tensorflow.contrib.kfac.python.ops import estimator_lib as estimator -from tensorflow.contrib.kfac.python.ops import fisher_blocks_lib as fisher_blocks -from tensorflow.contrib.kfac.python.ops import fisher_factors_lib as fisher_factors -from tensorflow.contrib.kfac.python.ops import layer_collection_lib as layer_collection -from tensorflow.contrib.kfac.python.ops import loss_functions_lib as loss_functions -from tensorflow.contrib.kfac.python.ops import op_queue_lib as op_queue -from tensorflow.contrib.kfac.python.ops import optimizer_lib as optimizer -from tensorflow.contrib.kfac.python.ops import utils_lib as utils -from tensorflow.python.util.all_util import remove_undocumented -# pylint: enable=unused-import,line-too-long - -_allowed_symbols = [ - "curvature_matrix_vector_products", - "estimator", - "fisher_blocks", - "fisher_factors", - "layer_collection", - "loss_functions", - "op_queue", - "optimizer", - "utils", -] - -remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/examples/BUILD b/tensorflow/contrib/kfac/examples/BUILD deleted file mode 100644 index 8186fa1c62..0000000000 --- a/tensorflow/contrib/kfac/examples/BUILD +++ /dev/null @@ -1,80 +0,0 @@ -package(default_visibility = [ - "//learning/brain/contrib/kfac/examples:__subpackages__", - "//tensorflow/contrib/kfac/examples:__subpackages__", -]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -py_binary( - name = "mlp_mnist_main", - srcs = ["mlp_mnist_main.py"], - srcs_version = "PY2AND3", - deps = [ - ":mlp", - "//tensorflow:tensorflow_py", - ], -) - -py_library( - name = "mlp", - srcs = ["mlp.py"], - srcs_version = "PY2AND3", - deps = [ - ":mnist", - "//tensorflow:tensorflow_py", - ], -) - -py_binary( - name = "convnet_mnist_single_main", - srcs = ["convnet_mnist_single_main.py"], - srcs_version = "PY2AND3", - deps = [ - ":convnet", - "//tensorflow:tensorflow_py", - ], -) - -py_binary( - name = "convnet_mnist_multi_tower_main", - srcs = ["convnet_mnist_multi_tower_main.py"], - srcs_version = "PY2AND3", - deps = [ - ":convnet", - "//tensorflow:tensorflow_py", - ], -) - -py_binary( - name = "convnet_mnist_distributed_main", - srcs = ["convnet_mnist_distributed_main.py"], - srcs_version = "PY2AND3", - deps = [ - ":convnet", - "//tensorflow:tensorflow_py", - ], -) - -py_library( - name = "convnet", - srcs = ["convnet.py"], - srcs_version = "PY2AND3", - deps = [ - ":mlp", - ":mnist", - "//tensorflow:tensorflow_py", - "//third_party/py/numpy", - ], -) - -py_library( - name = "mnist", - srcs = ["mnist.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow:tensorflow_py", - "//third_party/py/numpy", - ], -) diff --git a/tensorflow/contrib/kfac/examples/convnet.py b/tensorflow/contrib/kfac/examples/convnet.py deleted file mode 100644 index 44e01e1aeb..0000000000 --- a/tensorflow/contrib/kfac/examples/convnet.py +++ /dev/null @@ -1,667 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Train a ConvNet on MNIST using K-FAC. - -This library fits a 5-layer ConvNet on MNIST using K-FAC. The model has the -following structure, - -- Conv Layer: 5x5 kernel, 16 output channels. -- Max Pool: 3x3 kernel, stride 2. -- Conv Layer: 5x5 kernel, 16 output channels. -- Max Pool: 3x3 kernel, stride 2. -- Linear: 10 output dims. - -After 3k~6k steps, this should reach perfect accuracy on the training set. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import numpy as np -import tensorflow as tf - -from tensorflow.contrib.kfac.examples import mlp -from tensorflow.contrib.kfac.examples import mnist -from tensorflow.contrib.kfac.python.ops import optimizer as opt - - -lc = tf.contrib.kfac.layer_collection -oq = tf.contrib.kfac.op_queue -opt = tf.contrib.kfac.optimizer - -__all__ = [ - "conv_layer", - "max_pool_layer", - "linear_layer", - "build_model", - "minimize_loss_single_machine", - "distributed_grads_only_and_ops_chief_worker", - "distributed_grads_and_ops_dedicated_workers", - "train_mnist_single_machine", - "train_mnist_distributed_sync_replicas", - "train_mnist_multitower" -] - - -# Inverse update ops will be run every _INVERT_EVRY iterations. -_INVERT_EVERY = 10 - - -def conv_layer(layer_id, inputs, kernel_size, out_channels): - """Builds a convolutional layer with ReLU non-linearity. - - Args: - layer_id: int. Integer ID for this layer's variables. - inputs: Tensor of shape [num_examples, width, height, in_channels]. Each row - corresponds to a single example. - kernel_size: int. Width and height of the convolution kernel. The kernel is - assumed to be square. - out_channels: int. Number of output features per pixel. - - Returns: - preactivations: Tensor of shape [num_examples, width, height, out_channels]. - Values of the layer immediately before the activation function. - activations: Tensor of shape [num_examples, width, height, out_channels]. - Values of the layer immediately after the activation function. - params: Tuple of (kernel, bias), parameters for this layer. - """ - # TODO(b/67004004): Delete this function and rely on tf.layers exclusively. - layer = tf.layers.Conv2D( - out_channels, - kernel_size=[kernel_size, kernel_size], - kernel_initializer=tf.random_normal_initializer(stddev=0.01), - padding="SAME", - name="conv_%d" % layer_id) - preactivations = layer(inputs) - activations = tf.nn.relu(preactivations) - - # layer.weights is a list. This converts it a (hashable) tuple. - return preactivations, activations, (layer.kernel, layer.bias) - - -def max_pool_layer(layer_id, inputs, kernel_size, stride): - """Build a max-pooling layer. - - Args: - layer_id: int. Integer ID for this layer's variables. - inputs: Tensor of shape [num_examples, width, height, in_channels]. Each row - corresponds to a single example. - kernel_size: int. Width and height to pool over per input channel. The - kernel is assumed to be square. - stride: int. Step size between pooling operations. - - Returns: - Tensor of shape [num_examples, width/stride, height/stride, out_channels]. - Result of applying max pooling to 'inputs'. - """ - # TODO(b/67004004): Delete this function and rely on tf.layers exclusively. - with tf.variable_scope("pool_%d" % layer_id): - return tf.nn.max_pool( - inputs, [1, kernel_size, kernel_size, 1], [1, stride, stride, 1], - padding="SAME", - name="pool") - - -def linear_layer(layer_id, inputs, output_size): - """Builds the final linear layer for an MNIST classification problem. - - Args: - layer_id: int. Integer ID for this layer's variables. - inputs: Tensor of shape [num_examples, width, height, in_channels]. Each row - corresponds to a single example. - output_size: int. Number of output dims per example. - - Returns: - activations: Tensor of shape [num_examples, output_size]. Values of the - layer immediately after the activation function. - params: Tuple of (weights, bias), parameters for this layer. - """ - # TODO(b/67004004): Delete this function and rely on tf.layers exclusively. - pre, _, params = mlp.fc_layer(layer_id, inputs, output_size) - return pre, params - - -def build_model(examples, labels, num_labels, layer_collection): - """Builds a ConvNet classification model. - - Args: - examples: Tensor of shape [num_examples, num_features]. Represents inputs of - model. - labels: Tensor of shape [num_examples]. Contains integer IDs to be predicted - by softmax for each example. - num_labels: int. Number of distinct values 'labels' can take on. - layer_collection: LayerCollection instance. Layers will be registered here. - - Returns: - loss: 0-D Tensor representing loss to be minimized. - accuracy: 0-D Tensor representing model's accuracy. - """ - # Build a ConvNet. For each layer with parameters, we'll keep track of the - # preactivations, activations, weights, and bias. - tf.logging.info("Building model.") - pre0, act0, params0 = conv_layer( - layer_id=0, inputs=examples, kernel_size=5, out_channels=16) - act1 = max_pool_layer(layer_id=1, inputs=act0, kernel_size=3, stride=2) - pre2, act2, params2 = conv_layer( - layer_id=2, inputs=act1, kernel_size=5, out_channels=16) - act3 = max_pool_layer(layer_id=3, inputs=act2, kernel_size=3, stride=2) - flat_act3 = tf.reshape(act3, shape=[-1, int(np.prod(act3.shape[1:4]))]) - logits, params4 = linear_layer( - layer_id=4, inputs=flat_act3, output_size=num_labels) - loss = tf.reduce_mean( - tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=labels, logits=logits)) - accuracy = tf.reduce_mean( - tf.cast(tf.equal(labels, tf.argmax(logits, axis=1)), dtype=tf.float32)) - - with tf.device("/cpu:0"): - tf.summary.scalar("loss", loss) - tf.summary.scalar("accuracy", accuracy) - - # Register parameters. K-FAC needs to know about the inputs, outputs, and - # parameters of each conv/fully connected layer and the logits powering the - # posterior probability over classes. - tf.logging.info("Building LayerCollection.") - layer_collection.register_conv2d(params0, (1, 1, 1, 1), "SAME", examples, - pre0) - layer_collection.register_conv2d(params2, (1, 1, 1, 1), "SAME", act1, pre2) - layer_collection.register_fully_connected(params4, flat_act3, logits) - layer_collection.register_categorical_predictive_distribution( - logits, name="logits") - - return loss, accuracy - - -def minimize_loss_single_machine(loss, - accuracy, - layer_collection, - device="/gpu:0", - session_config=None): - """Minimize loss with K-FAC on a single machine. - - A single Session is responsible for running all of K-FAC's ops. The covariance - and inverse update ops are placed on `device`. All model variables are on CPU. - - Args: - loss: 0-D Tensor. Loss to be minimized. - accuracy: 0-D Tensor. Accuracy of classifier on current minibatch. - layer_collection: LayerCollection instance describing model architecture. - Used by K-FAC to construct preconditioner. - device: string, Either '/cpu:0' or '/gpu:0'. The covariance and inverse - update ops are run on this device. - session_config: None or tf.ConfigProto. Configuration for tf.Session(). - - Returns: - final value for 'accuracy'. - """ - # Train with K-FAC. - g_step = tf.train.get_or_create_global_step() - optimizer = opt.KfacOptimizer( - learning_rate=0.0001, - cov_ema_decay=0.95, - damping=0.001, - layer_collection=layer_collection, - placement_strategy="round_robin", - cov_devices=[device], - inv_devices=[device], - momentum=0.9) - (cov_update_thunks, - inv_update_thunks) = optimizer.make_vars_and_create_op_thunks() - - def make_update_op(update_thunks): - update_ops = [thunk() for thunk in update_thunks] - return tf.group(*update_ops) - - cov_update_op = make_update_op(cov_update_thunks) - with tf.control_dependencies([cov_update_op]): - inverse_op = tf.cond( - tf.equal(tf.mod(g_step, _INVERT_EVERY), 0), - lambda: make_update_op(inv_update_thunks), tf.no_op) - with tf.control_dependencies([inverse_op]): - with tf.device(device): - train_op = optimizer.minimize(loss, global_step=g_step) - - tf.logging.info("Starting training.") - with tf.train.MonitoredTrainingSession(config=session_config) as sess: - while not sess.should_stop(): - global_step_, loss_, accuracy_, _ = sess.run( - [g_step, loss, accuracy, train_op]) - - if global_step_ % _INVERT_EVERY == 0: - tf.logging.info("global_step: %d | loss: %f | accuracy: %s", - global_step_, loss_, accuracy_) - - return accuracy_ - - -def _is_gradient_task(task_id, num_tasks): - """Returns True if this task should update the weights.""" - if num_tasks < 3: - return True - return 0 <= task_id < 0.6 * num_tasks - - -def _is_cov_update_task(task_id, num_tasks): - """Returns True if this task should update K-FAC's covariance matrices.""" - if num_tasks < 3: - return False - return 0.6 * num_tasks <= task_id < num_tasks - 1 - - -def _is_inv_update_task(task_id, num_tasks): - """Returns True if this task should update K-FAC's preconditioner.""" - if num_tasks < 3: - return False - return task_id == num_tasks - 1 - - -def _num_gradient_tasks(num_tasks): - """Number of tasks that will update weights.""" - if num_tasks < 3: - return num_tasks - return int(np.ceil(0.6 * num_tasks)) - - -def _make_distributed_train_op( - task_id, - num_worker_tasks, - num_ps_tasks, - layer_collection -): - """Creates optimizer and distributed training op. - - Constructs KFAC optimizer and wraps it in `sync_replicas` optimizer. Makes - the train op. - - Args: - task_id: int. Integer in [0, num_worker_tasks). ID for this worker. - num_worker_tasks: int. Number of workers in this distributed training setup. - num_ps_tasks: int. Number of parameter servers holding variables. If 0, - parameter servers are not used. - layer_collection: LayerCollection instance describing model architecture. - Used by K-FAC to construct preconditioner. - - Returns: - sync_optimizer: `tf.train.SyncReplicasOptimizer` instance which wraps KFAC - optimizer. - optimizer: Instance of `opt.KfacOptimizer`. - global_step: `tensor`, Global step. - """ - tf.logging.info("Task id : %d", task_id) - with tf.device(tf.train.replica_device_setter(num_ps_tasks)): - global_step = tf.train.get_or_create_global_step() - optimizer = opt.KfacOptimizer( - learning_rate=0.0001, - cov_ema_decay=0.95, - damping=0.001, - layer_collection=layer_collection, - momentum=0.9) - sync_optimizer = tf.train.SyncReplicasOptimizer( - opt=optimizer, - replicas_to_aggregate=_num_gradient_tasks(num_worker_tasks), - total_num_replicas=num_worker_tasks) - return sync_optimizer, optimizer, global_step - - -def distributed_grads_only_and_ops_chief_worker( - task_id, is_chief, num_worker_tasks, num_ps_tasks, master, checkpoint_dir, - loss, accuracy, layer_collection, invert_every=10): - """Minimize loss with a synchronous implementation of K-FAC. - - All workers perform gradient computation. Chief worker applies gradient after - averaging the gradients obtained from all the workers. All workers block - execution until the update is applied. Chief worker runs covariance and - inverse update ops. Covariance and inverse matrices are placed on parameter - servers in a round robin manner. For further details on synchronous - distributed optimization check `tf.train.SyncReplicasOptimizer`. - - Args: - task_id: int. Integer in [0, num_worker_tasks). ID for this worker. - is_chief: `boolean`, `True` if the worker is chief worker. - num_worker_tasks: int. Number of workers in this distributed training setup. - num_ps_tasks: int. Number of parameter servers holding variables. If 0, - parameter servers are not used. - master: string. IP and port of TensorFlow runtime process. Set to empty - string to run locally. - checkpoint_dir: string or None. Path to store checkpoints under. - loss: 0-D Tensor. Loss to be minimized. - accuracy: dict mapping strings to 0-D Tensors. Additional accuracy to - run with each step. - layer_collection: LayerCollection instance describing model architecture. - Used by K-FAC to construct preconditioner. - invert_every: `int`, Number of steps between update the inverse. - - Returns: - final value for 'accuracy'. - - Raises: - ValueError: if task_id >= num_worker_tasks. - """ - - sync_optimizer, optimizer, global_step = _make_distributed_train_op( - task_id, num_worker_tasks, num_ps_tasks, layer_collection) - (cov_update_thunks, - inv_update_thunks) = optimizer.make_vars_and_create_op_thunks() - - tf.logging.info("Starting training.") - hooks = [sync_optimizer.make_session_run_hook(is_chief)] - - def make_update_op(update_thunks): - update_ops = [thunk() for thunk in update_thunks] - return tf.group(*update_ops) - - if is_chief: - cov_update_op = make_update_op(cov_update_thunks) - with tf.control_dependencies([cov_update_op]): - inverse_op = tf.cond( - tf.equal(tf.mod(global_step, invert_every), 0), - lambda: make_update_op(inv_update_thunks), - tf.no_op) - with tf.control_dependencies([inverse_op]): - train_op = sync_optimizer.minimize(loss, global_step=global_step) - else: - train_op = sync_optimizer.minimize(loss, global_step=global_step) - - with tf.train.MonitoredTrainingSession( - master=master, - is_chief=is_chief, - checkpoint_dir=checkpoint_dir, - hooks=hooks, - stop_grace_period_secs=0) as sess: - while not sess.should_stop(): - global_step_, loss_, accuracy_, _ = sess.run( - [global_step, loss, accuracy, train_op]) - tf.logging.info("global_step: %d | loss: %f | accuracy: %s", global_step_, - loss_, accuracy_) - return accuracy_ - - -def distributed_grads_and_ops_dedicated_workers( - task_id, is_chief, num_worker_tasks, num_ps_tasks, master, checkpoint_dir, - loss, accuracy, layer_collection): - """Minimize loss with a synchronous implementation of K-FAC. - - Different workers are responsible for different parts of K-FAC's Ops. The - first 60% of tasks compute gradients; the next 20% accumulate covariance - statistics; the last 20% invert the matrices used to precondition gradients. - The chief worker applies the gradient . - - Args: - task_id: int. Integer in [0, num_worker_tasks). ID for this worker. - is_chief: `boolean`, `True` if the worker is chief worker. - num_worker_tasks: int. Number of workers in this distributed training setup. - num_ps_tasks: int. Number of parameter servers holding variables. If 0, - parameter servers are not used. - master: string. IP and port of TensorFlow runtime process. Set to empty - string to run locally. - checkpoint_dir: string or None. Path to store checkpoints under. - loss: 0-D Tensor. Loss to be minimized. - accuracy: dict mapping strings to 0-D Tensors. Additional accuracy to - run with each step. - layer_collection: LayerCollection instance describing model architecture. - Used by K-FAC to construct preconditioner. - - Returns: - final value for 'accuracy'. - - Raises: - ValueError: if task_id >= num_worker_tasks. - """ - sync_optimizer, optimizer, global_step = _make_distributed_train_op( - task_id, num_worker_tasks, num_ps_tasks, layer_collection) - _, cov_update_op, inv_update_ops, _, _, _ = optimizer.make_ops_and_vars() - train_op = sync_optimizer.minimize(loss, global_step=global_step) - inv_update_queue = oq.OpQueue(inv_update_ops) - - tf.logging.info("Starting training.") - is_chief = (task_id == 0) - hooks = [sync_optimizer.make_session_run_hook(is_chief)] - with tf.train.MonitoredTrainingSession( - master=master, - is_chief=is_chief, - checkpoint_dir=checkpoint_dir, - hooks=hooks, - stop_grace_period_secs=0) as sess: - while not sess.should_stop(): - # Choose which op this task is responsible for running. - if _is_gradient_task(task_id, num_worker_tasks): - learning_op = train_op - elif _is_cov_update_task(task_id, num_worker_tasks): - learning_op = cov_update_op - elif _is_inv_update_task(task_id, num_worker_tasks): - # TODO(duckworthd): Running this op before cov_update_op has been run a - # few times can result in "InvalidArgumentError: Cholesky decomposition - # was not successful." Delay running this op until cov_update_op has - # been run a few times. - learning_op = inv_update_queue.next_op(sess) - else: - raise ValueError("Which op should task %d do?" % task_id) - - global_step_, loss_, accuracy_, _ = sess.run( - [global_step, loss, accuracy, learning_op]) - tf.logging.info("global_step: %d | loss: %f | accuracy: %s", global_step_, - loss_, accuracy_) - - return accuracy_ - - -def train_mnist_single_machine(data_dir, - num_epochs, - use_fake_data=False, - device="/gpu:0"): - """Train a ConvNet on MNIST. - - Args: - data_dir: string. Directory to read MNIST examples from. - num_epochs: int. Number of passes to make over the training set. - use_fake_data: bool. If True, generate a synthetic dataset. - device: string, Either '/cpu:0' or '/gpu:0'. The covariance and inverse - update ops are run on this device. - - Returns: - accuracy of model on the final minibatch of training data. - """ - # Load a dataset. - tf.logging.info("Loading MNIST into memory.") - examples, labels = mnist.load_mnist( - data_dir, - num_epochs=num_epochs, - batch_size=128, - use_fake_data=use_fake_data, - flatten_images=False) - - # Build a ConvNet. - layer_collection = lc.LayerCollection() - loss, accuracy = build_model( - examples, labels, num_labels=10, layer_collection=layer_collection) - - # Fit model. - return minimize_loss_single_machine( - loss, accuracy, layer_collection, device=device) - - -def train_mnist_multitower(data_dir, num_epochs, num_towers, - use_fake_data=True, devices=None): - """Train a ConvNet on MNIST. - - Training data is split equally among the towers. Each tower computes loss on - its own batch of data and the loss is aggregated on the CPU. The model - variables are placed on first tower. The covariance and inverse update ops - and variables are placed on GPUs in a round robin manner. - - Args: - data_dir: string. Directory to read MNIST examples from. - num_epochs: int. Number of passes to make over the training set. - num_towers: int. Number of CPUs to split inference across. - use_fake_data: bool. If True, generate a synthetic dataset. - devices: string, Either list of CPU or GPU. The covariance and inverse - update ops are run on this device. - - Returns: - accuracy of model on the final minibatch of training data. - """ - if devices: - device_count = {"GPU": num_towers} - else: - device_count = {"CPU": num_towers} - - devices = devices or [ - "/cpu:{}".format(tower_id) for tower_id in range(num_towers) - ] - # Load a dataset. - tf.logging.info("Loading MNIST into memory.") - tower_batch_size = 128 - batch_size = tower_batch_size * num_towers - tf.logging.info( - ("Loading MNIST into memory. Using batch_size = %d = %d towers * %d " - "tower batch size.") % (batch_size, num_towers, tower_batch_size)) - examples, labels = mnist.load_mnist( - data_dir, - num_epochs=num_epochs, - batch_size=batch_size, - use_fake_data=use_fake_data, - flatten_images=False) - - # Split minibatch across towers. - examples = tf.split(examples, num_towers) - labels = tf.split(labels, num_towers) - - # Build an MLP. Each tower's layers will be added to the LayerCollection. - layer_collection = lc.LayerCollection() - tower_results = [] - for tower_id in range(num_towers): - with tf.device(devices[tower_id]): - with tf.name_scope("tower%d" % tower_id): - with tf.variable_scope(tf.get_variable_scope(), reuse=(tower_id > 0)): - tf.logging.info("Building tower %d." % tower_id) - tower_results.append( - build_model(examples[tower_id], labels[tower_id], 10, - layer_collection)) - losses, accuracies = zip(*tower_results) - - # Average across towers. - loss = tf.reduce_mean(losses) - accuracy = tf.reduce_mean(accuracies) - - # Fit model. - - session_config = tf.ConfigProto( - allow_soft_placement=False, - device_count=device_count, - ) - - g_step = tf.train.get_or_create_global_step() - optimizer = opt.KfacOptimizer( - learning_rate=0.0001, - cov_ema_decay=0.95, - damping=0.001, - layer_collection=layer_collection, - placement_strategy="round_robin", - cov_devices=devices, - inv_devices=devices, - momentum=0.9) - (cov_update_thunks, - inv_update_thunks) = optimizer.make_vars_and_create_op_thunks() - - def make_update_op(update_thunks): - update_ops = [thunk() for thunk in update_thunks] - return tf.group(*update_ops) - - cov_update_op = make_update_op(cov_update_thunks) - with tf.control_dependencies([cov_update_op]): - inverse_op = tf.cond( - tf.equal(tf.mod(g_step, _INVERT_EVERY), 0), - lambda: make_update_op(inv_update_thunks), tf.no_op) - with tf.control_dependencies([inverse_op]): - train_op = optimizer.minimize(loss, global_step=g_step) - - tf.logging.info("Starting training.") - with tf.train.MonitoredTrainingSession(config=session_config) as sess: - while not sess.should_stop(): - global_step_, loss_, accuracy_, _ = sess.run( - [g_step, loss, accuracy, train_op]) - - if global_step_ % _INVERT_EVERY == 0: - tf.logging.info("global_step: %d | loss: %f | accuracy: %s", - global_step_, loss_, accuracy_) - - -def train_mnist_distributed_sync_replicas(task_id, - is_chief, - num_worker_tasks, - num_ps_tasks, - master, - data_dir, - num_epochs, - op_strategy, - use_fake_data=False): - """Train a ConvNet on MNIST using Sync replicas optimizer. - - Args: - task_id: int. Integer in [0, num_worker_tasks). ID for this worker. - is_chief: `boolean`, `True` if the worker is chief worker. - num_worker_tasks: int. Number of workers in this distributed training setup. - num_ps_tasks: int. Number of parameter servers holding variables. - master: string. IP and port of TensorFlow runtime process. - data_dir: string. Directory to read MNIST examples from. - num_epochs: int. Number of passes to make over the training set. - op_strategy: `string`, Strategy to run the covariance and inverse - ops. If op_strategy == `chief_worker` then covariance and inverse - update ops are run on chief worker otherwise they are run on dedicated - workers. - - use_fake_data: bool. If True, generate a synthetic dataset. - - Returns: - accuracy of model on the final minibatch of training data. - - Raises: - ValueError: If `op_strategy` not in ["chief_worker", "dedicated_workers"]. - """ - # Load a dataset. - tf.logging.info("Loading MNIST into memory.") - examples, labels = mnist.load_mnist( - data_dir, - num_epochs=num_epochs, - batch_size=128, - use_fake_data=use_fake_data, - flatten_images=False) - - # Build a ConvNet. - layer_collection = lc.LayerCollection() - with tf.device(tf.train.replica_device_setter(num_ps_tasks)): - loss, accuracy = build_model( - examples, labels, num_labels=10, layer_collection=layer_collection) - - # Fit model. - checkpoint_dir = None if data_dir is None else os.path.join(data_dir, "kfac") - if op_strategy == "chief_worker": - return distributed_grads_only_and_ops_chief_worker( - task_id, is_chief, num_worker_tasks, num_ps_tasks, master, - checkpoint_dir, loss, accuracy, layer_collection) - elif op_strategy == "dedicated_workers": - return distributed_grads_and_ops_dedicated_workers( - task_id, is_chief, num_worker_tasks, num_ps_tasks, master, - checkpoint_dir, loss, accuracy, layer_collection) - else: - raise ValueError("Only supported op strategies are : {}, {}".format( - "chief_worker", "dedicated_workers")) - - -if __name__ == "__main__": - tf.app.run() diff --git a/tensorflow/contrib/kfac/examples/convnet_mnist_distributed_main.py b/tensorflow/contrib/kfac/examples/convnet_mnist_distributed_main.py deleted file mode 100644 index b4c2d4a9e9..0000000000 --- a/tensorflow/contrib/kfac/examples/convnet_mnist_distributed_main.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Train a ConvNet on MNIST using K-FAC. - -Distributed training with sync replicas optimizer. See -`convnet.train_mnist_distributed_sync_replicas` for details. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -from absl import flags -import tensorflow as tf - -from tensorflow.contrib.kfac.examples import convnet - -FLAGS = flags.FLAGS -flags.DEFINE_integer("task", -1, "Task identifier") -flags.DEFINE_string("data_dir", "/tmp/mnist", "local mnist dir") -flags.DEFINE_string( - "cov_inv_op_strategy", "chief_worker", - "In dist training mode run the cov, inv ops on chief or dedicated workers." -) -flags.DEFINE_string("master", "local", "Session master.") -flags.DEFINE_integer("ps_tasks", 2, - "Number of tasks in the parameter server job.") -flags.DEFINE_integer("replicas_to_aggregate", 5, - "Number of replicas to aggregate.") -flags.DEFINE_integer("worker_replicas", 5, "Number of replicas in worker job.") -flags.DEFINE_integer("num_epochs", None, "Number of epochs.") - - -def _is_chief(): - """Determines whether a job is the chief worker.""" - if "chief_worker" in FLAGS.brain_jobs: - return FLAGS.brain_job_name == "chief_worker" - else: - return FLAGS.task == 0 - - -def main(unused_argv): - _ = unused_argv - convnet.train_mnist_distributed_sync_replicas( - FLAGS.task, _is_chief(), FLAGS.worker_replicas, FLAGS.ps_tasks, - FLAGS.master, FLAGS.data_dir, FLAGS.num_epochs, FLAGS.cov_inv_op_strategy) - -if __name__ == "__main__": - tf.app.run(main=main) diff --git a/tensorflow/contrib/kfac/examples/convnet_mnist_multi_tower_main.py b/tensorflow/contrib/kfac/examples/convnet_mnist_multi_tower_main.py deleted file mode 100644 index 4249bf8a8d..0000000000 --- a/tensorflow/contrib/kfac/examples/convnet_mnist_multi_tower_main.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Train a ConvNet on MNIST using K-FAC. - -Multi tower training mode. See `convnet.train_mnist_multitower` for details. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -from absl import flags -import tensorflow as tf - -from tensorflow.contrib.kfac.examples import convnet - -FLAGS = flags.FLAGS -flags.DEFINE_string("data_dir", "/tmp/multitower_1/mnist", "local mnist dir") -flags.DEFINE_integer("num_towers", 2, - "Number of towers for multi tower training.") - - -def main(unused_argv): - _ = unused_argv - assert FLAGS.num_towers > 1 - devices = ["/gpu:{}".format(tower_id) for tower_id in range(FLAGS.num_towers)] - convnet.train_mnist_multitower( - FLAGS.data_dir, - num_epochs=200, - num_towers=FLAGS.num_towers, - devices=devices) - - -if __name__ == "__main__": - tf.app.run(main=main) diff --git a/tensorflow/contrib/kfac/examples/convnet_mnist_single_main.py b/tensorflow/contrib/kfac/examples/convnet_mnist_single_main.py deleted file mode 100644 index 2c1f099360..0000000000 --- a/tensorflow/contrib/kfac/examples/convnet_mnist_single_main.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Train a ConvNet on MNIST using K-FAC. - -Train on single machine. See `convnet.train_mnist_single_machine` for details. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -from absl import flags -import tensorflow as tf - -from tensorflow.contrib.kfac.examples import convnet - -FLAGS = flags.FLAGS -flags.DEFINE_string("data_dir", "/tmp/mnist", "local mnist dir") - - -def main(unused_argv): - convnet.train_mnist_single_machine(FLAGS.data_dir, num_epochs=200) - - -if __name__ == "__main__": - tf.app.run(main=main) diff --git a/tensorflow/contrib/kfac/examples/mlp.py b/tensorflow/contrib/kfac/examples/mlp.py deleted file mode 100644 index ea2b252a05..0000000000 --- a/tensorflow/contrib/kfac/examples/mlp.py +++ /dev/null @@ -1,354 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Train an MLP on MNIST using K-FAC. - -This library fits a 3-layer, tanh-activated MLP on MNIST using K-FAC. After -~25k steps, this should reach perfect accuracy on the training set. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from tensorflow.contrib.kfac.examples import mnist - -lc = tf.contrib.kfac.layer_collection -opt = tf.contrib.kfac.optimizer - -__all__ = [ - "fc_layer", - "train_mnist", - "train_mnist_multitower", -] - - -def fc_layer(layer_id, inputs, output_size): - """Builds a fully connected layer. - - Args: - layer_id: int. Integer ID for this layer's variables. - inputs: Tensor of shape [num_examples, input_size]. Each row corresponds - to a single example. - output_size: int. Number of output dimensions after fully connected layer. - - Returns: - preactivations: Tensor of shape [num_examples, output_size]. Values of the - layer immediately before the activation function. - activations: Tensor of shape [num_examples, output_size]. Values of the - layer immediately after the activation function. - params: Tuple of (weights, bias), parameters for this layer. - """ - # TODO(b/67004004): Delete this function and rely on tf.layers exclusively. - layer = tf.layers.Dense( - output_size, - kernel_initializer=tf.random_normal_initializer(), - name="fc_%d" % layer_id) - preactivations = layer(inputs) - activations = tf.nn.tanh(preactivations) - - # layer.weights is a list. This converts it a (hashable) tuple. - return preactivations, activations, (layer.kernel, layer.bias) - - -def build_model(examples, labels, num_labels, layer_collection): - """Builds an MLP classification model. - - Args: - examples: Tensor of shape [num_examples, num_features]. Represents inputs of - model. - labels: Tensor of shape [num_examples]. Contains integer IDs to be predicted - by softmax for each example. - num_labels: int. Number of distinct values 'labels' can take on. - layer_collection: LayerCollection instance describing model architecture. - - Returns: - loss: 0-D Tensor representing loss to be minimized. - accuracy: 0-D Tensor representing model's accuracy. - """ - # Build an MLP. For each layer, we'll keep track of the preactivations, - # activations, weights, and bias. - pre0, act0, params0 = fc_layer(layer_id=0, inputs=examples, output_size=128) - pre1, act1, params1 = fc_layer(layer_id=1, inputs=act0, output_size=64) - pre2, act2, params2 = fc_layer(layer_id=2, inputs=act1, output_size=32) - logits, _, params3 = fc_layer(layer_id=3, inputs=act2, output_size=num_labels) - loss = tf.reduce_mean( - tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=labels, logits=logits)) - accuracy = tf.reduce_mean( - tf.cast(tf.equal(labels, tf.argmax(logits, axis=1)), dtype=tf.float32)) - - # Register parameters. K-FAC needs to know about the inputs, outputs, and - # parameters of each layer and the logits powering the posterior probability - # over classes. - tf.logging.info("Building LayerCollection.") - layer_collection.register_fully_connected(params0, examples, pre0) - layer_collection.register_fully_connected(params1, act0, pre1) - layer_collection.register_fully_connected(params2, act1, pre2) - layer_collection.register_fully_connected(params3, act2, logits) - layer_collection.register_categorical_predictive_distribution( - logits, name="logits") - - return loss, accuracy - - -def minimize(loss, accuracy, layer_collection, num_towers, session_config=None): - """Minimize 'loss' with KfacOptimizer. - - Args: - loss: 0-D Tensor. Loss to be minimized. - accuracy: 0-D Tensor. Accuracy of classifier on current minibatch. - layer_collection: LayerCollection instance. Describes layers in model. - num_towers: int. Number of CPUs to split minibatch across. - session_config: tf.ConfigProto. Configuration for tf.Session(). - - Returns: - accuracy of classifier on final minibatch. - """ - devices = tuple("/cpu:%d" % tower_id for tower_id in range(num_towers)) - - # Train with K-FAC. We'll use a decreasing learning rate that's cut in 1/2 - # every 10k iterations. - tf.logging.info("Building KFAC Optimizer.") - global_step = tf.train.get_or_create_global_step() - optimizer = opt.KfacOptimizer( - learning_rate=tf.train.exponential_decay( - 0.00002, global_step, 10000, 0.5, staircase=True), - cov_ema_decay=0.95, - damping=0.0005, - layer_collection=layer_collection, - momentum=0.99, - placement_strategy="round_robin", - cov_devices=devices, - inv_devices=devices) - - (cov_update_thunks, - inv_update_thunks) = optimizer.make_vars_and_create_op_thunks() - - def make_update_op(update_thunks): - update_ops = [thunk() for thunk in update_thunks] - return tf.group(*update_ops) - - # TODO(b/78537047): change (some) examples to use PeriodicInvCovUpdateKfacOpt - # once that gets moved over? Could still leave more advanced examples as they - # are (e.g. train_mnist_estimator in this file) - - cov_update_op = make_update_op(cov_update_thunks) - with tf.control_dependencies([cov_update_op]): - # We update the inverses only every 20 iterations. - inverse_op = tf.cond( - tf.equal(tf.mod(global_step, 100), 0), - lambda: make_update_op(inv_update_thunks), tf.no_op) - with tf.control_dependencies([inverse_op]): - train_op = optimizer.minimize(loss, global_step=global_step) - - tf.logging.info("Starting training.") - with tf.train.MonitoredTrainingSession(config=session_config) as sess: - while not sess.should_stop(): - global_step_, loss_, accuracy_, _ = sess.run( - [global_step, loss, accuracy, train_op]) - - if global_step_ % 100 == 0: - tf.logging.info("global_step: %d | loss: %f | accuracy: %f", - global_step_, loss_, accuracy_) - - return accuracy_ - - -def train_mnist(data_dir, num_epochs, use_fake_data=False): - """Train an MLP on MNIST. - - Args: - data_dir: string. Directory to read MNIST examples from. - num_epochs: int. Number of passes to make over the training set. - use_fake_data: bool. If True, generate a synthetic dataset. - - Returns: - accuracy of model on the final minibatch of training data. - """ - # Load a dataset. - tf.logging.info("Loading MNIST into memory.") - examples, labels = mnist.load_mnist( - data_dir, - num_epochs=num_epochs, - batch_size=64, - flatten_images=True, - use_fake_data=use_fake_data) - - # Build an MLP. The model's layers will be added to the LayerCollection. - tf.logging.info("Building model.") - layer_collection = lc.LayerCollection() - loss, accuracy = build_model(examples, labels, 10, layer_collection) - - # Fit model. - minimize(loss, accuracy, layer_collection, 1) - - -def train_mnist_multitower(data_dir, - num_epochs, - num_towers, - use_fake_data=False): - """Train an MLP on MNIST, splitting the minibatch across multiple towers. - - Args: - data_dir: string. Directory to read MNIST examples from. - num_epochs: int. Number of passes to make over the training set. - num_towers: int. Number of CPUs to split minibatch across. - use_fake_data: bool. If True, generate a synthetic dataset. - - Returns: - accuracy of model on the final minibatch of training data. - """ - # Load a dataset. - tower_batch_size = 64 - batch_size = tower_batch_size * num_towers - tf.logging.info( - ("Loading MNIST into memory. Using batch_size = %d = %d towers * %d " - "tower batch size.") % (batch_size, num_towers, tower_batch_size)) - examples, labels = mnist.load_mnist( - data_dir, - num_epochs=num_epochs, - batch_size=batch_size, - flatten_images=True, - use_fake_data=use_fake_data) - - # Split minibatch across towers. - examples = tf.split(examples, num_towers) - labels = tf.split(labels, num_towers) - - # Build an MLP. Each tower's layers will be added to the LayerCollection. - layer_collection = lc.LayerCollection() - tower_results = [] - for tower_id in range(num_towers): - with tf.device("/cpu:%d" % tower_id): - with tf.name_scope("tower%d" % tower_id): - with tf.variable_scope(tf.get_variable_scope(), reuse=(tower_id > 0)): - tf.logging.info("Building tower %d." % tower_id) - tower_results.append( - build_model(examples[tower_id], labels[tower_id], 10, - layer_collection)) - losses, accuracies = zip(*tower_results) - - # Average across towers. - loss = tf.reduce_mean(losses) - accuracy = tf.reduce_mean(accuracies) - - # Fit model. - session_config = tf.ConfigProto( - allow_soft_placement=False, device_count={ - "CPU": num_towers - }) - return minimize( - loss, accuracy, layer_collection, num_towers, - session_config=session_config) - - -def train_mnist_estimator(data_dir, num_epochs, use_fake_data=False): - """Train an MLP on MNIST using tf.estimator. - - Args: - data_dir: string. Directory to read MNIST examples from. - num_epochs: int. Number of passes to make over the training set. - use_fake_data: bool. If True, generate a synthetic dataset. - - Returns: - accuracy of model on the final minibatch of training data. - """ - - # Load a dataset. - def input_fn(): - tf.logging.info("Loading MNIST into memory.") - return mnist.load_mnist( - data_dir, - num_epochs=num_epochs, - batch_size=64, - flatten_images=True, - use_fake_data=use_fake_data) - - def model_fn(features, labels, mode, params): - """Model function for MLP trained with K-FAC. - - Args: - features: Tensor of shape [batch_size, input_size]. Input features. - labels: Tensor of shape [batch_size]. Target labels for training. - mode: tf.estimator.ModeKey. Must be TRAIN. - params: ignored. - - Returns: - EstimatorSpec for training. - - Raises: - ValueError: If 'mode' is anything other than TRAIN. - """ - del params - - if mode != tf.estimator.ModeKeys.TRAIN: - raise ValueError("Only training is supposed with this API.") - - # Build a ConvNet. - layer_collection = lc.LayerCollection() - loss, accuracy = build_model( - features, labels, num_labels=10, layer_collection=layer_collection) - - # Train with K-FAC. - global_step = tf.train.get_or_create_global_step() - optimizer = opt.KfacOptimizer( - learning_rate=tf.train.exponential_decay( - 0.00002, global_step, 10000, 0.5, staircase=True), - cov_ema_decay=0.95, - damping=0.0001, - layer_collection=layer_collection, - momentum=0.99) - - (cov_update_thunks, - inv_update_thunks) = optimizer.make_vars_and_create_op_thunks() - - def make_update_op(update_thunks): - update_ops = [thunk() for thunk in update_thunks] - return tf.group(*update_ops) - - def make_batch_executed_op(update_thunks, batch_size=1): - return tf.group(*tf.contrib.kfac.utils.batch_execute( - global_step, update_thunks, batch_size=batch_size)) - - # Run cov_update_op every step. Run 1 inv_update_ops per step. - cov_update_op = make_update_op(cov_update_thunks) - with tf.control_dependencies([cov_update_op]): - # But make sure to execute all the inverse ops on the first step - inverse_op = tf.cond(tf.equal(global_step, 0), - lambda: make_update_op(inv_update_thunks), - lambda: make_batch_executed_op(inv_update_thunks)) - with tf.control_dependencies([inverse_op]): - train_op = optimizer.minimize(loss, global_step=global_step) - - # Print metrics every 5 sec. - hooks = [ - tf.train.LoggingTensorHook( - { - "loss": loss, - "accuracy": accuracy - }, every_n_secs=5), - ] - return tf.estimator.EstimatorSpec( - mode=mode, loss=loss, train_op=train_op, training_hooks=hooks) - - run_config = tf.estimator.RunConfig( - model_dir="/tmp/mnist", save_checkpoints_steps=1, keep_checkpoint_max=100) - - # Train until input_fn() is empty with Estimator. This is a prerequisite for - # TPU compatibility. - estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) - estimator.train(input_fn=input_fn) diff --git a/tensorflow/contrib/kfac/examples/mlp_mnist_main.py b/tensorflow/contrib/kfac/examples/mlp_mnist_main.py deleted file mode 100644 index 9c34ade1d2..0000000000 --- a/tensorflow/contrib/kfac/examples/mlp_mnist_main.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Train an MLP on MNIST using K-FAC. - -See mlp.py for details. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys - -import tensorflow as tf - -from tensorflow.contrib.kfac.examples import mlp - -FLAGS = None - - -def main(argv): - _ = argv - if FLAGS.use_estimator: - if FLAGS.num_towers != 1: - raise ValueError("Only 1 device supported in tf.estimator example.") - mlp.train_mnist_estimator(FLAGS.data_dir, num_epochs=200) - elif FLAGS.num_towers > 1: - mlp.train_mnist_multitower( - FLAGS.data_dir, num_epochs=200, num_towers=FLAGS.num_towers) - else: - mlp.train_mnist(FLAGS.data_dir, num_epochs=200) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--data_dir", - type=str, - default="/tmp/mnist", - help="Directory to store dataset in.") - parser.add_argument( - "--num_towers", - type=int, - default=1, - help="Number of CPUs to split minibatch across.") - parser.add_argument( - "--use_estimator", - action="store_true", - help="Use tf.estimator API to train.") - FLAGS, unparsed = parser.parse_known_args() - tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/kfac/examples/mnist.py b/tensorflow/contrib/kfac/examples/mnist.py deleted file mode 100644 index 547c4ab25d..0000000000 --- a/tensorflow/contrib/kfac/examples/mnist.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utilities for loading MNIST into TensorFlow.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -__all__ = [ - 'load_mnist', -] - - -def load_mnist(data_dir, - num_epochs, - batch_size, - flatten_images=True, - use_fake_data=False): - """Loads MNIST dataset into memory. - - Args: - data_dir: string. Directory to read MNIST examples from. - num_epochs: int. Number of passes to make over the dataset. - batch_size: int. Number of examples per minibatch. - flatten_images: bool. If True, [28, 28, 1]-shaped images are flattened into - [784]-shaped vectors. - use_fake_data: bool. If True, generate a synthetic dataset rather than - reading MNIST in. - - Returns: - examples: Tensor of shape [batch_size, 784] if 'flatten_images' is - True, else [batch_size, 28, 28, 1]. Each row is one example. - Values in [0, 1]. - labels: Tensor of shape [batch_size]. Indices of integer corresponding to - each example. Values in {0...9}. - """ - if use_fake_data: - rng = np.random.RandomState(42) - num_examples = batch_size * 4 - images = rng.rand(num_examples, 28 * 28) - if not flatten_images: - images = np.reshape(images, [num_examples, 28, 28, 1]) - labels = rng.randint(10, size=num_examples) - else: - mnist_data = tf.contrib.learn.datasets.mnist.read_data_sets( - data_dir, reshape=flatten_images) - num_examples = len(mnist_data.train.labels) - images = mnist_data.train.images - labels = mnist_data.train.labels - - dataset = tf.data.Dataset.from_tensor_slices((np.asarray( - images, dtype=np.float32), np.asarray(labels, dtype=np.int64))) - return (dataset.repeat(num_epochs).shuffle(num_examples).batch(batch_size) - .make_one_shot_iterator().get_next()) diff --git a/tensorflow/contrib/kfac/examples/tests/BUILD b/tensorflow/contrib/kfac/examples/tests/BUILD deleted file mode 100644 index ede7f183fe..0000000000 --- a/tensorflow/contrib/kfac/examples/tests/BUILD +++ /dev/null @@ -1,52 +0,0 @@ -package(default_visibility = ["//visibility:private"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -load("//tensorflow:tensorflow.bzl", "py_test") - -py_test( - name = "mlp_test", - size = "large", - srcs = ["mlp_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "notsan", - ], - deps = [ - "//tensorflow:tensorflow_py", - "//tensorflow/contrib/kfac/examples:mlp", - "//third_party/py/numpy", - ], -) - -py_test( - name = "convnet_test", - size = "large", - srcs = ["convnet_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_pip", - "notsan", - ], - deps = [ - "//tensorflow:tensorflow_py", - "//tensorflow/contrib/kfac", - "//tensorflow/contrib/kfac/examples:convnet", - "//third_party/py/numpy", - ], -) - -py_test( - name = "mnist_test", - srcs = ["mnist_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ - "//tensorflow:tensorflow_py", - "//tensorflow/contrib/kfac/examples:mnist", - "//third_party/py/numpy", - ], -) diff --git a/tensorflow/contrib/kfac/examples/tests/convnet_test.py b/tensorflow/contrib/kfac/examples/tests/convnet_test.py deleted file mode 100644 index adecda7166..0000000000 --- a/tensorflow/contrib/kfac/examples/tests/convnet_test.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for convnet.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from tensorflow.contrib.kfac import layer_collection as lc -from tensorflow.contrib.kfac.examples import convnet - - -class ConvNetTest(tf.test.TestCase): - - def testConvLayer(self): - with tf.Graph().as_default(): - pre, act, (w, b) = convnet.conv_layer( - layer_id=1, - inputs=tf.zeros([5, 3, 3, 2]), - kernel_size=3, - out_channels=5) - self.assertShapeEqual(np.zeros([5, 3, 3, 5]), pre) - self.assertShapeEqual(np.zeros([5, 3, 3, 5]), act) - self.assertShapeEqual(np.zeros([3, 3, 2, 5]), tf.convert_to_tensor(w)) - self.assertShapeEqual(np.zeros([5]), tf.convert_to_tensor(b)) - self.assertIsInstance(w, tf.Variable) - self.assertIsInstance(b, tf.Variable) - self.assertIn("conv_1", w.op.name) - self.assertIn("conv_1", b.op.name) - - def testMaxPoolLayer(self): - with tf.Graph().as_default(): - act = convnet.max_pool_layer( - layer_id=1, inputs=tf.zeros([5, 6, 6, 2]), kernel_size=5, stride=3) - self.assertShapeEqual(np.zeros([5, 2, 2, 2]), act) - self.assertEqual(act.op.name, "pool_1/pool") - - def testLinearLayer(self): - with tf.Graph().as_default(): - act, (w, b) = convnet.linear_layer( - layer_id=1, inputs=tf.zeros([5, 20]), output_size=5) - self.assertShapeEqual(np.zeros([5, 5]), act) - self.assertShapeEqual(np.zeros([20, 5]), tf.convert_to_tensor(w)) - self.assertShapeEqual(np.zeros([5]), tf.convert_to_tensor(b)) - self.assertIsInstance(w, tf.Variable) - self.assertIsInstance(b, tf.Variable) - self.assertIn("fc_1", w.op.name) - self.assertIn("fc_1", b.op.name) - - def testBuildModel(self): - with tf.Graph().as_default(): - x = tf.placeholder(tf.float32, [None, 6, 6, 3]) - y = tf.placeholder(tf.int64, [None]) - layer_collection = lc.LayerCollection() - loss, accuracy = convnet.build_model( - x, y, num_labels=5, layer_collection=layer_collection) - - # Ensure layers and logits were registered. - self.assertEqual(len(layer_collection.fisher_blocks), 3) - self.assertEqual(len(layer_collection.losses), 1) - - # Ensure inference doesn't crash. - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - feed_dict = { - x: np.random.randn(10, 6, 6, 3).astype(np.float32), - y: np.random.randint(5, size=10).astype(np.int64), - } - sess.run([loss, accuracy], feed_dict=feed_dict) - - def _build_toy_problem(self): - """Construct a toy linear regression problem. - - Initial loss should be, - 2.5 = 0.5 * (1^2 + 2^2) - - Returns: - loss: 0-D Tensor representing loss to be minimized. - accuracy: 0-D Tensors representing model accuracy. - layer_collection: LayerCollection instance describing model architecture. - """ - x = np.asarray([[1.], [2.]]).astype(np.float32) - y = np.asarray([1., 2.]).astype(np.float32) - x, y = (tf.data.Dataset.from_tensor_slices((x, y)) - .repeat(100).batch(2).make_one_shot_iterator().get_next()) - w = tf.get_variable("w", shape=[1, 1], initializer=tf.zeros_initializer()) - y_hat = tf.matmul(x, w) - loss = tf.reduce_mean(0.5 * tf.square(y_hat - y)) - accuracy = loss - - layer_collection = lc.LayerCollection() - layer_collection.register_fully_connected(params=w, inputs=x, outputs=y_hat) - layer_collection.register_normal_predictive_distribution(y_hat) - - return loss, accuracy, layer_collection - - def testMinimizeLossSingleMachine(self): - with tf.Graph().as_default(): - loss, accuracy, layer_collection = self._build_toy_problem() - accuracy_ = convnet.minimize_loss_single_machine( - loss, accuracy, layer_collection, device="/cpu:0") - self.assertLess(accuracy_, 2.0) - - def testMinimizeLossDistributed(self): - with tf.Graph().as_default(): - loss, accuracy, layer_collection = self._build_toy_problem() - accuracy_ = convnet.distributed_grads_only_and_ops_chief_worker( - task_id=0, - is_chief=True, - num_worker_tasks=1, - num_ps_tasks=0, - master="", - checkpoint_dir=None, - loss=loss, - accuracy=accuracy, - layer_collection=layer_collection) - self.assertLess(accuracy_, 2.0) - - def testTrainMnistSingleMachine(self): - with tf.Graph().as_default(): - # Ensure model training doesn't crash. - # - # Ideally, we should check that accuracy increases as the model converges, - # but there are too few parameters for the model to effectively memorize - # the training set the way an MLP can. - convnet.train_mnist_single_machine( - data_dir=None, num_epochs=1, use_fake_data=True, device="/cpu:0") - - def testTrainMnistMultitower(self): - with tf.Graph().as_default(): - # Ensure model training doesn't crash. - convnet.train_mnist_multitower( - data_dir=None, num_epochs=1, num_towers=2, use_fake_data=True) - - def testTrainMnistDistributed(self): - with tf.Graph().as_default(): - # Ensure model training doesn't crash. - convnet.train_mnist_distributed_sync_replicas( - task_id=0, - is_chief=True, - num_worker_tasks=1, - num_ps_tasks=0, - master="", - data_dir=None, - num_epochs=2, - op_strategy="chief_worker", - use_fake_data=True) - - -if __name__ == "__main__": - tf.test.main() diff --git a/tensorflow/contrib/kfac/examples/tests/mlp_test.py b/tensorflow/contrib/kfac/examples/tests/mlp_test.py deleted file mode 100644 index 22da6c29f1..0000000000 --- a/tensorflow/contrib/kfac/examples/tests/mlp_test.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for mlp.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from tensorflow.contrib.kfac.examples import mlp - - -class MlpTest(tf.test.TestCase): - - def testFcLayer(self): - with tf.Graph().as_default(): - pre, act, (w, b) = mlp.fc_layer( - layer_id=1, inputs=tf.zeros([5, 3]), output_size=10) - self.assertShapeEqual(np.zeros([5, 10]), pre) - self.assertShapeEqual(np.zeros([5, 10]), act) - self.assertShapeEqual(np.zeros([3, 10]), tf.convert_to_tensor(w)) - self.assertShapeEqual(np.zeros([10]), tf.convert_to_tensor(b)) - self.assertIsInstance(w, tf.Variable) - self.assertIsInstance(b, tf.Variable) - self.assertIn("fc_1/", w.op.name) - self.assertIn("fc_1/", b.op.name) - - def testTrainMnist(self): - with tf.Graph().as_default(): - # Ensure model training doesn't crash. - # - # Ideally, we should check that accuracy increases as the model converges, - # but that takes a non-trivial amount of compute. - mlp.train_mnist(data_dir=None, num_epochs=1, use_fake_data=True) - - def testTrainMnistMultitower(self): - with tf.Graph().as_default(): - # Ensure model training doesn't crash. - mlp.train_mnist_multitower( - data_dir=None, num_epochs=1, num_towers=2, use_fake_data=True) - - def testTrainMnistEstimator(self): - with tf.Graph().as_default(): - # Ensure model training doesn't crash. - mlp.train_mnist_estimator(data_dir=None, num_epochs=1, use_fake_data=True) - - -if __name__ == "__main__": - tf.test.main() diff --git a/tensorflow/contrib/kfac/examples/tests/mnist_test.py b/tensorflow/contrib/kfac/examples/tests/mnist_test.py deleted file mode 100644 index 92f8462357..0000000000 --- a/tensorflow/contrib/kfac/examples/tests/mnist_test.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for mnist.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from tensorflow.contrib.kfac.examples import mnist - - -class MnistTest(tf.test.TestCase): - - def testValues(self): - """Ensure values are in their expected range.""" - with tf.Graph().as_default(): - examples, labels = mnist.load_mnist( - data_dir=None, num_epochs=1, batch_size=64, use_fake_data=True) - - with self.test_session() as sess: - examples_, labels_ = sess.run([examples, labels]) - self.assertTrue(np.all((0 <= examples_) & (examples_ < 1))) - self.assertTrue(np.all((0 <= labels_) & (labels_ < 10))) - - def testFlattenedShapes(self): - """Ensure images are flattened into their appropriate shape.""" - with tf.Graph().as_default(): - examples, labels = mnist.load_mnist( - data_dir=None, - num_epochs=1, - batch_size=64, - flatten_images=True, - use_fake_data=True) - - with self.test_session() as sess: - examples_, labels_ = sess.run([examples, labels]) - self.assertEqual(examples_.shape, (64, 784)) - self.assertEqual(labels_.shape, (64,)) - - def testNotFlattenedShapes(self): - """Ensure non-flattened images are their appropriate shape.""" - with tf.Graph().as_default(): - examples, labels = mnist.load_mnist( - data_dir=None, - num_epochs=1, - batch_size=64, - flatten_images=False, - use_fake_data=True) - - with self.test_session() as sess: - examples_, labels_ = sess.run([examples, labels]) - self.assertEqual(examples_.shape, (64, 28, 28, 1)) - self.assertEqual(labels_.shape, (64,)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/contrib/kfac/g3doc/autoencoder.png b/tensorflow/contrib/kfac/g3doc/autoencoder.png deleted file mode 100644 index 20f93c77034f3355653a6a260cccdad29c080eaf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 54204 zcmeAS@N?(olHy`uVBq!ia0y~yVB%$9U=rqFV_;xtIig?Bz@Wh3>Eakt5%*>;dxgkR z>0|LiK?)8H69Syx@BMy{g=L9yS>S)A6*mH0ax^-cLMNdn`}SVkJH7iqeoT0>?9HFFyk~dw_su+6{Pq%}ZAPi5Zp`~Mzy9CB|Np+*Ptl8&lCS+D$R(~fCzE&4qD2>$dQYEi zcY3lqzg+bjLz{m;9>27^{D_^MJ^Aaas~Q0d9G3k&cJ%1O$^Lc+-|zq5x7fX3NmbR= z)_Zl>+JgUozptKes~WSvPIi9%Kg+s5AKm9z6f)J-)pc}qIP9(Z8uC|JS^2WkE`Z=BYt|izWc{VN3TBj zxBqKW_xEe~)pf5>HGe)Hzp*uYy6(HF+TkHL-)JxQ zn;VomT_cc*SIXpo@p+r(x3{-He%LO5Zkq7?>d;&FFgg#l^)}rw0r3 zMsLeGI6=`lU}sV4nVH7wr#t7CUUFezb=qC__EP9%;bt?-4Ea5u&soo~EM7LtCa(T( zDUXcB2Q`!Qb8`X@H3+}j`~BW(w*Px7KR-Iq$o%8q@Ap@i95{F=m`Q!f+UV_CX{E2O zI3^`2Sv)K@s`+$M{c6jK*6_GWSKlTP=DF78VKouwOJ840{P*W)*YOpBi$h9$O>90M z5ze@~%XHG@$%P*uxvq)dFK7SfL$i>YNQtDYr(eOsj0a23ZQ z$=Fl~h%|p&)HwN<{{BCUejQGqU)!eUJL|?J!+RSN4}W;O{r)W%=F zQ^V|QI$xj7h`r!)I68lCt6l9ci#06wCv=NwHWfcVw=n1CrXQb9>*sqFKc8E!W$4rx zuqox_Y`g!D`|U5R3|4PX-Kl(Np>z9%#qRv3foWgw>RD`$VSh`jtJes5K2>b!07 z%L>0R%z5&D-|xIV|Ns5YI;pVx!1+ws3)!lLCOZq0<{71S&C0l$7RefC;N30gDsg{P zx0r6%y4c-Xe78QatyPX;T9DfE_T1BHGiFG9-5wraD=MlLa$;NFtc)49)!U-DB6b!n zJsV?}d&}hOv5Nb(-!+SC-7X$5Kec>bm6vX4z=|v}ZQE&VdYrcZ|9pc02W`?Ax2jttMYD zdF$U?qZ#!x$G9tNpYQb*fz0~*ek|Iw=YrEx-Nh-YQZxGI%UYNDL~_kcdvapp%_!bT zz4_~Fe}7wLzfm{yYqI59%cMiORe- zIURa=neWxlVXHzs^UWtzhzSZ@Oc0FNd^2lNBeU%O1!rfQAMcSgKJc~J)=`-E@$r85 z_xJW*Ry~<9Y3a?s&*#@4TNS!`%F>15>*KP{-oN+$;ob84q4AIQ|NG^=*4tV-L-XM! zHdfEgX=igzZavwmmd(c@Ge=^2v)o^03&)@h7orX^M&)<(_J&TDIm$3yP{YSUGo9yn zzx;9C?ROTbrSko$YFd1;Vx7CjAHA3z3{e+8JwLzxQr*X+;u7Y0b2h0>l>b|m;hSimIy+rm%Hrgv zoNbKz&oB3%f9%T2;HjZqKhwm2)QGUWVduB|p`fvCro3+5o9iOi-aMZ3MpNs@-sKeu z3w9*#Uv#wH+jfh^Mw@Tu+w^bGZ*+2d&A&zH<8Swg2j46I)?#*Lu65Kn644*D)N5s` z*0aO)FZENV8OBFwglUFZlwHj4S~OMPq+49yZDZ2WFD0y<$%2f6ykd=}v)4Z3I9c<< z?7+#!=C*qaI@$QX)hxcsRDXYSdjH#UKZlsx&l{Wq4;}Dq^2@0yzQ}ZxTfL&%-F)x) zMMobeoI5y)d6t-kLvg@mH5ERQ?f0ry3-P8g&-KW(U6{`#IYC)HeP3iTXPe{Z1u5Hl z4j()hc-=8O zAI8OzQZw81Mb_x+Dn%`bc%0(*aYAH*?M*ci$t?_@c1mi@(X_J2On;i-d(mOOLUp|u zmxP`Cao(s{i5{ag+02g%ITjst=N4&>`L!hCv9ZSqOC}|$qQ2T~s@aPgn?=~)o+-02 zVCG}^w#Z?#QpTc2ztl5_?Jh9-aGS^n3tTYlWmaasUuW5QN#xwg{)3{68uLR<&KEs; z6fLXn=chC$VyDePgQO!IUoF|gx+MKNZncs~Sc^)plZH8-g{)9_EFpi#g;nf*c&3(R?m5%;*By`%fos{s?5`7DMt+*HfyCEn=QQUTkWR*bDA2%1l}G! ztdV@y>v)~_wWtRY$5jQoWV-@6E!`bloX>46*O0P^D7^4Yn)&)(%Y|uo79Be{Cws>& zg#+7JXNi6NW%41n`1`xNt7OhjI(Tx9UyDipC6Ut!WtB~p8IzmLUR+{1f3R_K*R~P| z{%XFa8o61@KaU+|>=a&cZ|Tf82ZAgLPBt9s6fO%pEi5AK-X{|n`LXh+PL*r3*@r^j zYyZ|>`nS`h+sW;|;IW4f&Te;&QZTuA;MeBZtrI4uhLxV%vAQw$)XDawiTTS#en z(%!B-c8Hy2>=)zWOOrf5&EQsv zOh%;}!&-ixlmpKfoHX4$p>RQo${Cri1S8}eZsW&q#6MKGmH(Qk0 z3x3CJ?)ws%UDuVRe*6)Mev`k2=VR`*e{&}*y6cqJo!e$Uefo>j z4(Sn5>n&~;M4o!ll6LG;LjK;is} zvi9{VSzn`)Nn7IG%dX^@9Q?%7*<*K;*gS8{KDSPc_lx|(4~xIs92IVkyXfFw)@A3HAR=>f?&Sli zO#3R+N_M60zVSY2L%w@>H=AyLp6t_TvsSIMKipiM!|ZrPK5x-VeSZ1C+8wh>RUb!6 z_Wd%lH&>R*nES*}rFx>ZjfmsPD7wk|lGM@HfHw%pY|)8;&o%$WJW z<$P&YUiy-mvtL>U7X54y<8;3*W|?UizwY%Vr$s-_GoBXT7IS5p`|O9?F~=S?e=gOx zdsx?n7Ju4UUwrDvpP04>w++}c_HE2a5BTsiBW>BtyZ`=h`Q4ETIoYBXQ+2Vpvnk&$ zhWpw3TMJhE=G{_s$r1m3cJ-E<;7 z`DV$i#l{N1Im_Q{@ZrkacTrlDw-Wa{xJ^85ckG{^x8UCvjYS$6FRKrKyErXmM|IPquHBj% z8b3E3l9k(X;^zB#8-h$W{3~&LS|n2|^`rJl(W#%GpI^M7VvKq5`)xX|muQiZ?AjW(+u@~bNSzSva39-Ml<1MkGr?zTFX1nPF{A{d1diiqmy1H8{WRT zpJ)1Q%ZHCMw;nJ{zJKXl<^pBK-**46&yn10wY>4rfT@b}!lq-N z#| zzc%cP^}?ks)715C#%(8S^*Saom6~zaMpj;WzeBv<&qvC^|EHADfX6~tQ zkeVK|+cn3<7y(2;^W=i!Pfj?j)z)vgYX6(H_>Bk2Q4T1NLVwTb_scy z!!+;Q5+$jUrc)BViym$hDqj%oyU|3#Wrg-k9;RIeGrLrJd>>w(8X@ueP>R*00<+!= ziEK)s0TvslSs4;7d8X2mXB;QJR8(kgkn?)>h$Ff9&zB`)$Eq}D_AAdhYg{s=xr5EU zWz&y2ix#GTyj_^R_8437gp?|d6ABC3C+)O2bVJ78c4g~EIj?7rInqD#$UaqAbWl)H zke7{fr`k*&rgaZz#OfW2kcjp_V5{m8zir;;4z}i=%*+>oH43|~XC)c1doF8l;9;uL znAxq|bN1uq$x#xo7pBV$(4p~}V~fchDMJR^Y;)5Y%_^6j9oQIcr<^$|c@7u_+*Me0n@aDD-PXIuuz}GnXH~Lv*VTb zocx_jb?biTE?u-S?{3zf-|s|xW`AQ;QtDc+d}Vunxrmksm;HX56O+~X=T+yKWL;UY z^ij>%tLZ;pEWR1B=ZE=|)6>sS%X@xKw&MTa%#Pl@JqHdbTv-t)>~Hr|hl!b4_LBI; z2Lgd*)7t*Mh}=E%pp42)Ii`Ik94}Vp`yCNHb}d-^*frxc&V5IAdQUHl*p|a;mV2wg zbMn0-S604Xm~t}6yz$NT_3Zn8B*}ex&cE~JW_I~K>_0z0cV8d(_rg+d^RGXTc8h;F zq0FCfXGiCXINP^UE%yKa$o5K^x@BY}w)3hkI;gm4jjQwa5C3+o?dxD`<|tMDywuSl zytP!~8bAhkq~Z_A73-^f0nz zN!f7K`DBbtVou-uc~=fa@T-0aofGeJ6wx!cGg9-?wF$tnTmLDWv-Gob}{@jY+CsUR@1$X|CS&=E@=VAKvXYPOUs- z8*5m6vA}!dz8AsUUHxpG)@fXy&c^7UYmk^zdzvHpwdC(v4A;v1j~qMp=;LwuV{fH7vT#Yp0eRy_8@ZoA^fW*giF>Eq8_2T;Exo{_)3S=MP7O zl`rfnySWaKTe@>^R@Y6LD~VdLE;ZC(Da`TM=uZQ9{Uj87b!*?J9X zeq7%4^wP|8zka{ppM11Sw4=M*`TxJa7gvRT^Ha)Pw2=STmF$;3hC`oIqZ7H{Yx!$Lzrz`8|@N|poZwrjvS)_Wb zU%p@3JWr$h&;&*2OMP_{y6>zd z(^R9j6ueoku;}2gb0>vT1UAguY#?^*ns9+X=cVaupTF6>sPTp0Je$si&h58c4sXl7 zZIFD7=gdrF_la|DtGE4{ey#1i{eKy5aXpv0R;5#>CSF+JXr;ru_rm_V?AOtn;?otH z8@~4H_q1;O%5o>T=}YQs%ja4bHrXw_RCRU(GO1SNr$tHUE?I^ULm* zw)4%s+BuOY_qLhq(^H{WG$&Mke&)J8&-Ti}02|j!q6e3|9_|!YH^{kRFx%hR%ZqDQ z>FZ@OiiZy$HY|8>prmiYIR?>-4|sGR7uMzq82BC9HRaF^t8G!|4&A6argLz*#rr!u zncwd<7rngveAwjroHwZ|YHFa7H@A{6sy#A47ybIZHT(J%-D&(bA2u}Q)N6$J`0yO- zk?gf9ef4l*yIi5C@{ctc5{5}E*Vf1LJ86flkpRtgY>W8Bpq_hLwcJ1DPSMKgTCUE! z57`!0AO5nu_@%G@$1j2(mO47b_uiHFmWVzdBN2W6#&ws&U8405ug8l&Ub}tSjR&#& z=Ph_Q>&L_P`GPVsb2LM$FCO4n?l(8&d+47(pXK#3S>}uK>Mfk?t0uym%l-Ypg0{D7 z^t8gC>o>3cdKR7tL~ASm9h|@6>m9!7VGA5xWWTJwV|?2t)3M^q#rA@~ze-KAu87RF zy1QuIfx5@NhCf~`*5BkaJJ7@+>HE96AD&LXzu04<(z4fziw-I{b+uhvxc$*NH(8!% z3Man^Y)BHg5S*9SrQEYNKQ}SQHhHxUuO(>Q@typQeYM(V`S&{H>th@6F-z`?_xXYBK#M@O-~(&LOPs7c3r>y2WIU zO(j#mjgf@?-!Hqpr5ts5*$jhBOf)ZWHqS1Dv<41LXJ@qiTJA2Bm}8p?b@{u>PqV!{ zyR6vk?d_-N#k$r0c-Stlp!ZIzsi~>(a|9hSf4?%V5oP1jpn=JVvd{Gx3!mp&bs-`v!6VN+`MoyzBX zv&>z0mz@n*8T7OyTCY-e+v=tcwq_MgsY!lehk7rEethWhG25WyVxm#J6pKulS-TQP z^6P`3itYETr3Y<~%+YssY5sgBgsZFViSg43BB9Lky{}eii(747a?hi6|NQ;yeh8i1 z{w=mTtNK(+8l*<)&M1&iE}xR}LgMB2dsW>1w%=wH{n+t--|r38-}Cl-yOsUo?(Xf@ z^DJ*0X-G+|d!O60UH(mz(!o~a-g>)#O5fb&L)5EsEjGLUjalZPc_(tO{V7qcW?AvF z$5BO0v!-QqtcI))u$7>fv$u`Xis7p1%4t zXt7&wP`lyByY6!$U~2)M&*7VuVenZ-gq@r3-%9xL-+#Su(afA$t+hN% z*UXor7%RHy;oI%^-PT5Jy|A~s{L9PB?y-q;FCP?Lq`2CuBjZPZ4-eC|?Tq7TM)m$?%L#w zfrl=b*B?L0T{zRq>6m(KuwOqblk=7R@$cvMZ(VNw;)Ljq^~V&RK7Oz&`ecddtA!I& z1UBqTY}shAZC*pQ*MxmJwjjf=h2M|iJntLbxHr&nhVdrmKYwK6t-U^Oc>1PHW>1ZX zyPo{v&eh8|)ieYts$STf6n=$!ed;^Ky*67xVYC1xj4m!pJUvZ!f&Jl+kB?tn8GdKe ziS6%8Utf##cXQ`loVMoBI_2pGuWy!doOu1m%rS7vdCBRtq(^p4)9peCt~hy zDN-yFylqdvV(Gi%Z)E4E%{l_^fI5WO$4Tk^iN!U>_x?_EgDmuPCuUM*8i&S{Y$}Afergw z(UU4OJKvG?`L)aR)+BZG_aA?7u-P#Go(;&{haWa}SA4pAH6y}QrvLlu#mcQ4H5P~W zAARcb_u1p)uWVapC0@Pa8nGp;MCUTw)$TCEunNBAlN=BHViP!a=tdROJ59%5o4(Gm zjWYRQcDiSJaOyYn*M_Nahjtckl(4E~^%CVixVm|E#6O9Ro#7u2z1rAzs&dD-y({Ds z=Wa0k5@02CQegq}q>~zlZm2lOo^0L7=b_i3G$)qfqNA9czLed+AIZ0ijvVb4pSm>h z6q~IeCx)_wyGk5bD&P|OyZo1nCkkY z8lPMd`6IS9Cj8!>+e(`pH19+wD+CoXU!SyoVL*aoZ021_Pc}wSsfL`O^`$x*_$KiD z{QP|TybfX20FyKEl^phKZQGu|&uYj@?Y5YBYt{SG84=q)TI~Dzm_hD!G<)x=*67$Jmw&$Rx9hN-&X;g=<9?ZCwL5Qa z)({j*>@NR!OI4<`*J*J_m1UXlb{;0a*=I{w5_6;$9_DG;=G$H|qJx*W#{`+Yv8WLYexB?8zfAPvV%N&M5AMnuYMMq`I_TMNWV4qksQZ3$ zuW6opl9$k}X?YK?xleaIP<02^FanRMxOcPNE3&NoloGNgB5S?g6i#-dvqzK1EYvs6-|5>`Y)I|dq?49r^2zH~WsGc(0c{X@pZ1Ghf>>wJ5A z`)b|kUteGUKBHRwM~(SZEt+g`g`>`Nujh|=d`o$A355Rv3zqz=&G5!9xrmNke&W^+XTl0FVhOF8V$;% zW?trO*?9Q`kE+1Awhfk3Pb_vlyeakchfk;V7ni-g<>BiadRjR7c%SR@b93jeS`s=j zGc!Tr=fNnIrAv}tJ)XBQ=h3NOmtK81xo-W$DYKSXN8c+BT$_=);UcI%o`0S_m=P-8N>5OrU(W#iYL z$fkmA^ESJ8x4pc)Tsi;V9tnelhN6}yVW*!wN%`}>{(tcP=clKiuX4IMWkF|~p?to> zm7A+Oc=!+AXrJ};h4IeKA7;FsZkT@j`G@bHj~|?&ysP`!Czpi>m~K=WzFl(uxwQR4 z8I76z+gqMJtgrmE;`|oJgh$I>T8LH4BuDRSSai^`K4$y7Q&o;Gk{N6BRX`mELEd-C zZ~7*Ey7NHieaecsy;ajREmpp{x%v1>nONyqVcy@178yQVz4pbG4JPm21zp|RTH%?O z?YHiD`0t)bxi_mLG`HCJ9iMx&`|Lc&hYJE`XI#kpT(jz6K#1m~vb=Vd>jD!v&zj0? zKJ6+IeLlja`M1RG#VXE=4$AVUE#vVunbX^MoO|=Xu*h1IRf}ip?6rFP@U%PQ6UUPi z9&vo*KlHfXHh-7hXSUh}+qPvbdib*KaKO%jS^eEGV_i)m&R-1Vyfhu$Cvn&J-%w@4 z`su<#=d9x{F)=ceCQS<1QIPoAwA_(zR>lIsrwchV#huR9?rT21O?uY@!D5qc=8Nqc z|Nim&vig#+`noxX7CG-v%KyT+eBR3Ht9=%twQ{%5y$^V~W6Q^y$?8|myX(0>OgNKR zqIdh9+OwEvH!9M!U$tI$E4Zhy{M4D;iKz>=?mOCw6vb@649?86jpl5%|M!EronQXg z>-GEl{Ox`osWP(iy5PdR*zU)*`C3P3u+LhM^RdY)==nK~FU>a!(z=BN+?+cnJkpqG z&vbnKoPg(^9|AO^%-jB(dZkwSe{Fm<I7`C6gPk zRh|~IetS{RWfScgLj?!`~L`)->==CEAD!Jp6ylN>GK5MxE?Nkzy9FS8(S97 zDE=(eI4}ee(8wGmX<9y}G)3 zhFR{cbvkcd53j2Cd-yp1QW<()+?Q`7hwFyIOFGe^&5qW=F7H7FaI|!y4fe=+V(x5k=+iq?}laW zo^39N5A)miTwLt_cxL*%z;t7ss4X1(|9s+3J3A}&rm_9cCzIP;oP&zGto))}nqSX) zcIB#ZMe<>hsfqvg{505jR$|Y-f4OtI_b2QU$h%upnLF!iV#eKr7Gl**ceP8xp1c1k zshGK|X0`Lo|9(G~)Vuv_sL|UWeYnImWRi+-Al=uhI$ zx4U%cp|Kasah6R|Ia}|F^c5HA9Xow?-q9OhCx<9SDKPzIDquFxIjwp4m0iOWY0Xmy zFWG(Aq|e1S&-7ugqngR6SUSws0yc zDoR)uE%7z^@DIF>(*0CN2S;hfl_{FR0V{)4rJL2~RWQBZ_j{e5+PBlf-xf96pB8G> zYvEH)HNUaRzh>ii>m#a16OXsrX2dNr{AaiMWa7MYZW%Fq?9Argou)UlERMy0WzSBz z)zgANUFeCu?bfqv`;JfNJo`&c;Mg@|v5ODvj=j@3xgq1kyN5Eenzf&vc=E_vT~U+T zmV3KR(l{;Q+#E}-=yzul7Hm~iQ)3HW?ss%o>FX&oPGmHmcw=eQ)iz`G^^a$IOtoIU z<~MbH`GM(O!t2Ya7REP%Tw=UB93%EAb@=XSVq#Q$_2OmtwY{m5%Jbqpy<5a8*0XJI zV*ABjaW<&UxJQ@yV~cx^&dCn zr~q1z?q{q1dZp~aw;@iQI-vR24+>0jiJ1|bQaG=ziF|Byal*w*4)?sTxhLL>XBYit zdA_{i{{C`}ih$^ftBZGhI;*H?$yoX6$I7`vca~0^Gpp&wkqDo=M#5VUXGLt$#+*;yN-ZSpS9Z$|bx}DJSratqIB}akK@`j2f?-HixeZS)UVBxLr3GdG7UvkwK zi4Z95zUs30(bo;1p4`7t^-g~48>5A1SHFA|yQl2rHlD|4u0B3!L-ACd`?Wvv}e6BW|ldrm9Lstoti; zwD#_*g|&Y@R?8n(|NOnp@xj!s`45v0sb26`)YFXfJ#}a6$CK~!1GLvz-E!S_;PwA` ze-G?7=sV`NbL(2APojr+X20Y=k`=T^k?p$I=UR;z6N&k`F&*2MMkn0or_Ga@BOyB=@yX`WaV2NzQdKJ@e@FLFr!`^*@N8 zyS9CidpB(OZp*x;gF>5xdCwHR{`Yx?o&8&{^fpUn>jUTY`ww0!N;is^V_Bzm?BSoS z-LFo(>iBc?!{>*o7DCdkw$uL!&wk1^&C%&pDbJc)-16^#c|{r(DQ)S?>AR}_|J;_0 zdj?WZKg9H zl@7LEX8fJ;eEN-=yxni}eZ5UW!h5TO7Iaw1mWSD1+Vbu3Roz8v__w!YHAee9p1l5M zoqWjF{$0oBd|kR@#@5HdU)Ef3&7HVbG|YGD^NIIt4$XdPlxO*M$*=TpIy<+@@7^8T zvia)~+bEMax3+ROv-7)k3aJW+h@>q3`1kw$XwZ0Dm5a06@)NtQ!X2LZ7_Z!yxe>lM>_C~&E z*F)3GoQm_pKOX-$_2sB4e<@9TD%p$ z*lGw!ENVGcy!x2L`8&)(ZERwPc7!c9{@QLKT+X-r(K&_r^#)ljp+_H{J|3_vVacxL zPabW}2+@kMyDW9#l%wLaR|o7rQnuK0JHP@yWHERj$rOURrH?_S(dz z6-6!CcWttxgS`6UjazOz-Ou={^z3bX%iHJWi>h0rFRYDmui5w9`U>~@R#Ek9E1t5w zV~MO!GmgtMV@kg6Zx*#A^<_kR?(;GwJ4-oxYYY44!kw~TU+=YF5WY0}qQWA+YjdLd z^S|jbOb@r3p1b^yVMKaVX`Rg;m$|o2?w7vls`U59rZoTQ8^KjvH=Ep^D?194x8&Zw zRwcydbwOKd#@C%QvL#H5V|N90iSQhHay&{>!}7bzM8^ZSwmq{HuKc8`tYRs$=icul zSA$mwMCo02dU<5Vlv!)~9zJXNeRAFUtCKhmOG__Ph`(1;vGr%M#MWP>U$=)Xowc&_ zhOqO>2|}lY7PgA1&Q7@dve(@96}Ruap!TZ2B`GW4ZvLIcbLiG(>45DKx?4JP*2&my zb>Ket`?kH?`L5V2(?WHhgxD{8@U!6g^9+$d@!qcM-;XRScxn7pBz=0=p~J?XGcI1( zQ(0VUA-sME9}iDR*L-t8@6_#bnlw=uNpVU?GX+nO55^03$JafWt};?qxTo2a|t(-W)GZAU*n zUcK>_^U+qhuGrJ1F4K!-H+*^$HK*4)fA`^*%TsG_lo)AqLk#P^A8!@%S4)0XuX{0&Hf>z+5N%$=kwo9oGu^V{GM+~?4svyK0c~? z^)9n8=HhhQwSM!y*~sZiRd}bSDH>fe=JP5FGufaweSO^V%m3r%uf6KaSAXhPwqkR` zS7$dr<7bxQ`FV4?_Edhlb?@K(OnbTFvYI|MRjCzgm(7{N&DQduz1jX|LjjZDzKKb; z3eBn^x-p!;llf9-Mg8vix@(F+uW$PHrQOHxs@#7U-_e(PYeD_g+Xoh(d6^%$En&$j zT8$VoZzP@`k`;>P09)-Bt1)Z1AKRnF|8e%WC?cBI{ z_1?;Dto^U_q~?@~d|U5cAD(Xi;fU8S^$X7*Y_`3xm0@GhogP!O-W5o#aYc? z9b#jgnm<_GsIq&zXZkGH)rsGi*3O(3V7Nu5@0i^2|5^7}>xg_gdx7tHQqxA=c!!G* zl+2f2Z8^F^&3~cK!65V6I<Vi7Rpm~_EONff3|YRj#F)$KQGyrvqMO3-i5`k z%+uA4jL%lS{?t+%KjZi1yfrs}-k-JVTjb$Y4=Z-0`?9buGdHfPu;29NV~W5Ap0&Ek z@yR}?S<8Df@~=)`AMg0(Yep#_3$y?8IeUKo^*MN{<-F?Dng44R`K`Xf z`Xr6%Q5D)c(?e%X&3W=AINxS-D5vzy+4*X6A~GuvDrv|+ORC@i>I;aUe0CLdvI6&-^ndKnf_bc4*vOX8Byud zTKqsUMPP$mRbGlu;>Ua6EEnXn%Ce;Y`)s&MY~`A1%v&6mA2T=ibN+wIHqd@iUv5#Z z^vxhkU$v!bFSF*bXW#AZFRNXty>7YdqsFLCzE_s(o@SWdyzDtGIx0TOT8>xwRTJOh zJNv2>o*C@ByPe^%w9T4F6aQY;jtdFB`bX!7<@Cske{7F?VocdYENm9tnfV|m{+Uq! zHr|^vi;9Y4`o(x>nx1*P{eEBN=Vz5JvuBF(MyVdF-zdrMA9HSt-+KESB1@IIDqL!d zA1Tg}Whr_jxp%KmV$K@Y^(t|p;Y;Rs+qY}EYBla-y#MyUV6FD|12=zs=J;^ByW`Kp z6VuHlJ~B@K`1@XIMAaMrt-X_PJUaf|7QLS?|_t(clEY3<=E_IoqKAF^G~PLD-&NoUmqJFwuFh1 z^@{Ggt*1f{?9tu7O2=wS=GzUQ?j-Gg^~B2N`j5gXngW%Rl~*3-`PW|V+66q1#W}J5&&PK6EuE`fn6-_+UG}vovNF3|Wx7tR&;9qcj~!OKe=DXq zZJxKo;GE>KFZV0{tbZQ*@Ya))^Tp-Km*KhRrsW9P;!@niRwo3Gqnrt2c6 zv66*juHs%(o)wq8Bh~9{U(GW7yV!2>hX;q!Mf<{M_lGz&eP1NPrL$m-+`q~{mvt*| zcZ=Rze<;>2b9a5&!hRtJ#vP5)H^gfNHvju6;%`zRA?kdYkL~fM@CEe^Lapo9&1T?g zubW!EC^+)GjnaY8+9QiyoA>%VXJxJ0CHMQNc)ZL0y1y}@^}#=P1eq*ZcJDw_X#2;{ zyOJaAf`7A3ToJNLAnK}S=Bu51H8+Af{nHG-*&Q?aSbXTwE&KP6Zd4}rR~@#MC=SW} zTeJ4)=Pae<6)`)QDs0Nvd)+mtI~)?7n%y3H{9JeS)%Rc0J9ZTvsuGY3aq3+ZdO4Qw zxclQG&#*B5x^uy<`KxdE=FXkF`!)BJ6Y(F-E7tK*?t*`Xouk1twj$&O7W1{dO6a({lFn7Sf(O3-dtnx8h0@3CpSG>1KZ?WrkYR_VH}_xbzj>~p22 zy6xX4Dk$+__HNx2r_FU+M2;Q0!NVB$G)}3$<*i%Tyq^YTe^p8}?n=!)z4h=S=lzSy zmqvfQz47TQf%TkgbySo_PEW$FGr>ZR}??C_L@<^{b}ox>>W#Es#6;mO-QNOeO6-5 zjw-3*^18-_rScA~7dE|;;r})LL(BDjU-JvO58Ie))Xw2KJx%xUj>5+U|Nc}?4Z3}J zZ~A#T_2NxxyQVH`^so8wB=yghZyNtBr(BzHv$bmKA%i91y7M>NooIQs;_2CXy&mu) zQ-ggo1Ci~y4w>D zwQ#P8-*5Nl&!07QZp%M!k21MoA8_TSw#4U;XVV|2f4i_%Tf}vHT590tn!x3+pJZ~T zY}mTbwFNwly;G>p_7l67xZe7*l_{dv=HKhNm8!M6I%La%&1x0iM zZH||#-}CLei)gRkigrEut-7)w^6zxbR_~v`Yt7lXH7U>EC#-9`H~H3L*TW5r%pd-K zzhAB6Ti(UyWFcS7Yb^fu$i&OxCB~I2+KfU%Z zH@9iu6%yr|dRToX$F%KH`;&GrG~0dcQ^v)By;Y^RH{bRz+Ie)jglzrp+ z=6W>Yc;fMW{F0xaT)ner(e2=7b6%!-r!HJ9=1g{PIRj3CS0*>FFMP%~qxia>hWq6= zdDd6AJxEI_(fen^#nQ6kvQ?#uMg2L649-g0ptG#+M z4r-VAzj&!*DR}(uY2m6@7N458?kKQy^bd)i^T)9=aBWWF(~6sm!Z>+eMchoDW>}Le zP_QcTj!2|LG=CIm0oZLF-gk1!ygiQHzQ1IDQFwmo z>FF9BUK1BuNXzywyV||D^zIzX057)ay}D^O^9;<+`Q>{hu9-e%XT_nLpf%a9CNCCF zah35G;hkx#^;hnjOt0I~6HYfOe@t2^#h(+;nYi%a4QbKl-s*FYIvGm?mzE?tS6WW? zU}H?rknGm(=`Ac)n`%(jHtpk%LpLp~_iDPDyilF$X_GI=D=A-lFP?p4O6`=+S<6+Q ze!XiWpLF@&=NGrPF;#8xu%7()*4Gpk_GR{u?`9+#aY(2xx1Vcp?fj_>pTwLgbBbTK zY%bo@b42W7Ld&iFnR`M_K76dSo*zHYW|f+VfQZmKzQez+u<76K+E*mo?ss&Nj|<}z zjam0sCZz}za7|x$;?NBNv*kjZ$;%5Q`*h~;f+me!n)5=;-M^$=oUqaC_o78tKRJ68 zF!;rG9G0G+Z|11>ddYjcn{Id48vgryR%3!b!{c_7@c+w}ENe<{`}Onb#!ks+O3e+b zXD8?dBQJr7bZO>1wogUMC}~0Ig9DusA==R_$Ir(eoRYtNw!+lfOID(Xb8fqKx3VSQ z`9Dk1&{8zJY>wM%wxxjz2R{2TZ{>00OVMFo`}G27e&ea?;o6?Nfs+;{FS+|MA??(M z)Qb})a+jydSQJWbEbM%6uImDmV@1lPBI6$=%X0%)TkJB??9RNO?|$a0i_M+QYEf78 z8eey6TGYHU(wd^x^5DQ0Y5U2+K?l3{Y`T4U&F%*=LV;=~ucbi=2{vUU$op==n`cHr z!TAdRsy5Bq(=EbvsA(#=d5~Ax^~|*U@%y{!xv6(AFV&iLO`}hJ+1jA`>c0nxq^$Tkt|Z={Al!XJ&f6;?22NE*Jm5 z&>>Rej%?oh0L!_}>VGxA$#oz5&0Vg=`0<+Z*}ydgcBRXYPrSVGfsyCF1t#4`ch1Px zh%(@){99X}39j0gADqT*+gOwXI?^otn7ch$J#UFmU!(~qQzG=z+ z=a%wxN$Y-ST7QVO+2H4W=Z&|n=yv|jU)z`e-=gEbne6e?-_6}xC$(NNIuRf3x5%@X zLqu;v8IO7Sk_*2cxnT+jZQ$epem$iq#(iOib3v z^}YXRwPaJxn*3aoRbq{X^0^LIcCLPLb(PJpEh|@Sn|1DQrFqsA?M3UfzJLGheCF!w ztmz%}xa&Of)r=QFQsTJ8W7IMv;@wuV()sG*hX-JAKn&zcpQ6bN<{eaPh{FdWcENnk^ z?|EmmaHq^mUr8=ZE8LrPr`s@n`lYzOUT)2uGA{At^{bti?1=qxURQl@--%%6*N^7D z|0nD^SuS^_jL-EqHj7^w?Wvc&vOb2Vxb9YO>F#rj(ocVSaebST-chxLyX%Zsgs+@Y z^eg27?~0?{PlD^DZ?d}g?B!d;Znwdp%zrUQ`fJP6mb_{b&zI>=S2LLD^{UIU;oQvS zddHMaR0P`nk1h)EU=UijJmJ?L*;gmB^Z9LVANrSe`PtSk?L@=ki+a&lGoCK!_?7uK z@JnVaqpXzMs@zq3rmGk&Qm8W1Up>d+yUgB}yT=z@-0WIe>SIxBF1E(1Cv2B@7~=GzMLs!4Qb7%=*$2bJ?NZHaoqf3ADV6zNl*pKpB? z$+=bSH<$I|y1FGoGW9u@3qPOSs~`3K+0zfF@Bh!8`PA@1VEXzS+b)K6>9nM?n;y7c z|K|9qx67x#cRTL3bmEK|P1o)3wiF%ZesFPL>69N4JO56xn$sO7ASWO;e?rTB^SKVc z*(Pd;2(5Tfz(6Z!}(I@t&q2Fh1h}E!pBsA&j$0zfIY}U$GbBTxBw-=O| zUYz||_+moS#tt^&4<%|}k2yAc&nQ^V!mPgG$rS0V^b`A5NNml1^yKI2ADMRpS)P3j zKi7BsTuh*Nm8XY)!*0KCGTqz$ZuFjP!;lJzd87--cNZ` z1P5qP<5rM$&L=L>cK0d8GgI%LGk(R>fBg4#$E86FSIxcl&Svqgm9c+=R{fncDOkY& z<3#%d*XD`XZ*jBSz4n^)?~Uf)+#lxDuI|h*I2hs5%voVI`PnqbhRI_7-48BtsO$ZT z`f#RwQEQW^W}HTut|9AxnOzO0*5?*@e&yIYyTw~gXpVBg!W2gt%i5#UI>NdyOi;M( zn6UIPcfyssyaw+!pM6ziI`66B>z&^$inOkkO}uT~{AyuYtHGabA9q;od9!WW(~H}4 zUAL|={nT8_apraVcKO`Srk~8A(RnxO8JBjkEcSYN$ZNrhn!kSq9v<+#8a_RHPPXCS zjm4)c7S8W@o!pi2WBSXj+1EeZ%3iN%U?AW-%j970_Pfs}Z0lfa@9=S-;CR5YrN;Q# zwKIDIFKR>?L|7Vs`u>((Z-sfDp5TR@AH8ZOebSupz=|ocwQ1^v&HUx1>zdZr*(dzI zTpPPO@$9+F79~c$Z|>gQ$rW{7bLF(eu=f^gWkP;$U)yol{j#54>I{oDtX-|o4*z>> zcq)8}dw0>PLx-Mj{4V~+W%VxAi<@07x0*QH3q=K0OygkIHQk?Y)mObM4Rufib%{-(Ly=OsG*u#&vyv=3UW!+UP_!zBLWhJ#7Ut+!@ z%18F`I##{!C3!JY@6X;}$#ST;+$=zSOWp71j<<~jSiKtd%hyF!^K_onl(?{O(xz!S zcdGe5|Fh}19W~YccGc1;qB(!(ZGL6(&F$);t3_weDT`m-6!1Ap^~yWhuHd}iOG{h5 zt!8z2`gl3M?Q-2LqLH>n+bAyLzI<(%NspYZ)amK^?hz3ZV)}78$~%jmdiBWL$Iavb z9ap93+_vI;+WC3C+j4I&s`~m0v=Knff8Lu3``{4=+LyMY$?&hyA@1kS`xcm}U)28C zX;Et!x0kor>0uw|yE6}>3nnr--q`Scj@5%M^W|Y$p&m6>GTSDVyDrGMQTCzf?ZWr3 z!+b4w{VRU4=xsj#{V-v9y?blxmJ7e`{%#sn`0U>v_rJYOOI_Cr&G>Lt>}tD4#jWf8 zA-9|E|4WnJapsG|cc}+OLcb-a*GOI0W8FVx%TwE5D*{9UudqD%8eYFxqK}7vF5R30W1wY5(uX;!P%7vakEOH2?lJ|L~+3!-bYQD%0IRs2r2IvC~>r(^7Qf$IdK+ z=b1AKIX%5fLiEF3-cDH|BiZ$qUH`%7)Zc~m+p}KfT5NGpxc4JxbJp2Q4;5cs(67GV z8guW=&Sk&e@Xyz~IX$;_{@%rQYJaQu^C(|hpL+S;=au@K9Z`P!)AnN1Ug7dEhT1Tb4TseI=N-AQ z&^dQe;_Yp@GYpg2o}HPwxZ=XD0}k2DwNi2Qe@j`|*%t>cc6)Ji^YX*`Gb5yAl#FB# zPF6FyJoV!Ro|?|Wv}QT>t2X~PJy$r%yzW(~-ZB}#)l>Lfoszix=Imdkqq3#x!jykE zu1wvrwKDQZoUhCCW2pt74u$H(l(MjXW?#X5YSFnkU-N@k)&+=I_s4v?AU5szCl#hS zJ1iS-vs+!#i>to!P5sx}oIAyScekx(jhklv?XvH-`|fw|2F&GIE&Jw}o!!fgVG=05-a6CCu8YmwKyYUSBYlW`ncYW$;VfDUG4{+5C>WiaEHl}OOVOw zKskrYakUk1XI$KPT`46A3mlOJ!oQT;~f(R$U@ z+s}ASTz%xkAsy*oO&diT?xwoWEGk(*euudOMZnIYia{I&M@^m|Qfm#3~_NzCb) zZM^b}!MSsc6Xt(ZFtk!H(rnxR`ACqQ*)i`k6%$0aKmE7M&g-s-_0NsdPVG5cta|O8 zRkK;C_sShMQzu&+WW-8dTD{EZ&tdmQ#VAi>%7maUNO<#QEoQH!%^nLX`y@sqD+bchb z__67IjOq2?B+KY`UzlskF44mWVx?nlS&GZ{zh&3gYIn6#{kP}cj3T$3(A_OjzvusX zYc_>bf5EoDcK6oz{mMDLe|gKMp0HA{6~-Q04wzQO9hw#^pdR)5ciu6fnkbVw7KKcQ z+juwsG+DA_$%5$Zd8Y)8Y;0th+4;6uI!QU|FFMF!`utMG?S$7cZ*7#f8Eo4p+fsO~ z=j9Wx=V4s@x*1{9FN;6E62$N7)2#LI)dih(e?O~){+jR7CVoJD|2tPNuT{IwmOjwE zy5aQ!%RN7jiWMAp>)qr!CH2|I@~~BFes5d3peE$rj~ZjY*p6U3-PqaFTniTLKWjM2 zZ%c{GoPUSw-4`0??i6CZ`f_blM!-^uZE`>I+dIDQJz?ziA@bsZ+{v%6uXp$H;pr09 zZhLsReYNg1?XWc)a(6y?$C-P2>BMIO7ZX}$6=$YwDD9g*@y4MWAkT0lKd+GNohzCy zS0yI%B>%RbXZy|7*C$=Q)FYN5%c(80WLvt?3X6}P+qkc0Qm#Zi9+2rgL^=6il0OL>#1^gHdsUH)lu2iKV>ySH++r z4|Es>dCv(={^0~V5K~csrAU)ekay>nhf8n4MCPbI2ws%<^wiXC%!?`#rp#nsG>7%& z>W}X5EthA?nw~#BJ^kaC%l@lxdwEaS>n*=m$?$>^IwAd|Ho0O#VZr|@b6;;*7-T&^xVbY8ongD|}By;h8y>!4kg%e*f;tNKk*OaA5HYWhJEp!K%xBW;S_E)%qDw zymP^#DIQ-VC}00)qm*^t z-mhVMe!W_~N)~jSqWJVVyEi?y*i-!6FJ^C5XlEPfxIBs99^xM)FCOq-pLS+Oqh;~4 z70V2(Urz1cvfg{u#E)9V_k=$k6^~z$e8BSI-Rk#yANVd6igA8$(Ov#1XxYN``1)9j z-E1#?cLvy8cx8}wM#ARP3FTMQV(sniQ})=LU-YQfDmv<1T*X6H9$BjpPNS~2XJ=<8 zzrD5f&<*yed+9Eq6BjpDea)I4yZ$Y6?(0ojDI&bSv(1jS@k;0XG|#@~b5~vIpbg`! zQ#(x@SNr|`_I6|8W4DJna;tUOqf|0ac6N0IEqZv6T|Oj?wNIXB&W8-g%WJaqbgZtY zd_5xUzk-`{eaz0Le!E{B`TKq@Gx)af+IPu|2h3yl>GsH4hnb#Vrkyl z`Y-#7+uuwLj|{P zZ+ptQPxAk-*X!458;C!fGH1@3Gf#4FZ#(+;`~CGC9O>ECOw)~D-anw~`}-;9?^M6+ z`3JriEti*M6_>4AAHRQHO^H?Mt0h0UlJ95a&llt^$$1oIEn9Ht@ZqWKyDXe`Et`BP z#jkkrUgeaGj1{-$D)miRm+0euS;@6)#nuGf3lBpqcDLyl?<{#K#KgpO#czH~3yaN{ z3(l+f^&>Vg#O^9_{QmClROOVfudYs=yo9BgWk<`&r<`X6gCu4@eyO>#*3Cpf<7`s? zo{w$2?C0)QPdUBxXP#No5suWfp3cs|X{@t~Gp*`Bom4OU^(AwPRw&oyWxmb}om$U` zPxCGY9c@`uvnFb*)~6>Y7yo-c?b@!=*IMg~AHJE@w=w^|U5}J0*O%-E|3x@O4j=EA z50BQ{$=zYCu5nF6Pi2O3g1%JCM$2jQrrcWY(yZP%HC+%^DrB>`nEZ(9_Ns-PZQ{TY z;o{)Z?3$n&4Lj?0pPPeA^ZKnTuDEkRPcw04aWRQt3ELz}YeNn^cRBp_KA-daN#&2G zN%879O|#yl8>9k0k;6Szchyn@6aFv>F6YlIXP!Uc{cB*eXTi^|S0@f4&g8IKr60IS z=>T8FzdxdkDNPLqCXDazFF6z-F-twcjy?ZHyOI>oHe+4h?NfI!-_7dEFz5(S{oJPC zIrG;}<+TPT{*egN16-O9mVpkgh-pi?_d&JL*LUq{SBY8a+B~fbE18}Ze`u@ISadMK zdUXiUbfpaQ4Zly0;x;cV0Jzb&Aw$BFA&FT zs4Y5ZT6;j6P3roUE4yM^7FKRMWKeD(axr1UTvrnbmg$N!c}$*Jaychv%xm~5vqb(G zUmMtauFl^(8earP>+rG(u1YbwFk|8LX|P~Z_zN>aQ!1}lqG8_F*-~d5C!K70KFdy_ zd23}KT38j&%6QQ~Z|iK(Q6rrh1-|kh+8U3Unr_t-Pt2IN*5HgRkE@A9PP!m3+rd;1 zHRfJ~0WaHkHZSa8Ykuo<@q+Fi`@Gf4)7Y9hLX9Ld&Rp>JhK1sUx*66IT^R;9&WZ5G z9{RnoeC{*QDJ4b^cP`^Uk&ruYv4Ke2GtL?34@&<68@9&9c_ZlT2A5{XmQT}i*yhAW zu$T!XbF(lKec54YT&ts2+2JQ z<-soUGHLnhIKN7l_nE>;7n2>!_$v-OJf1ZxYI?VU$@wq|aB%<2eSW|GzpQrH8i$w| znP1=E_lK>GO1;gjA!XJ2)zMy%ciBEoqYE<*Ef?ZOjVGfm#W^`TYJPJ%3LhWq%rN*b zTQH-U$AnMNvPXeOR;ftBQNqgQi9wS=Nymvqmi&Yln-?X1d2umfUybFVLx(n0d`yZ? zUK_Jh=<)IX8p4ju^uhTvsBj!V6! zhGhHCGV#>)-jea~#?FbSg=M?HIsN~&_U!j5F>{*DYdmfCR<K<~5#YuAG<6Y+YD+3Mp;w^nxu=K6Ybca_DQls4X1U zWp566>+gM1lD&HMMly~_(7vyAPI}7L<-+1^&!RmLIny9KQfh~dVePMyGjpxOx32s9 zsb%w7UvKB%O4}0`bg-pgT-yDfH(n@Mw8S|nF+*+>Qj+!WZWC7bOSrKi(QJO(lqn)5 zudn%9I<>X8CqFpQ_)31$SpJiZj-wDaPpp!6q zBpScIzOJmM#s*(x>n-i z_Omrh99@1;!(w;IJ%bB77R;KKrpR;lv1l8-9GXzGO=nWy)e;S*q;Qeem4I1eEM;IcRlxo4h|fLKDSHOXubDVWA6QFCJar{9`>;Eu8ZyWg?%O3 zTRL<4j^wQv4(o@>u;Mw~084 zNL-(^{$cOeV8L2jwbLa}xuzbx%-der&C2wLx#a2l+=(D-U;1FNYHxMP`PymI%JLy({*!>66r{k|SN=6DCZ0VDNU!x5uG#_S79a zRT$yG=lY>d-;;0Q#n?3lS9-NhcOYe8Ic=$9J(A75(q;;(s;-gZLPA0Y6(17L%(uT^ zb9#qH`$ze;$9Ki18zg3Mw}|^$mmj=2VbZK7+v(pHXH7iy@{|-mAy;*VhUQ{_IMuXl{@F#`Esf3F&<*?~)R? z$RuV=xl{kLWubwI|A895dl&d^!%S*^KAk?pvRLib_Wbo_0c)bRa`oGM;@GkJQ1GEi zSKB|_Ynr=bajJAwn+WXgBdX| zuCKrE>(@W?@7Wq-$@7WV8;d1Oi)Ev(>NU1kMLo`TbU&#+ZU2KS&bOjv&K;TS8c`6~ ze|OVM-gu$q51u=uBxcC%LUsAc$?Dp#4>d3{hj{BmYZSLwwViaTvbm+R|BB;rvq@ZzLugS#qpws)kEpbeq9`E;l+IO=;-_xV$ z^v>_k=A?Y9t?O6WU0wG7?#$2gZ_bR|v1`|&`2BT{o>zV4`JKA{d{Mi-$B`XN*)Qza zrhQNHX3%`SEm^{o<=S_gsmR&&NjYd!@YXEjbvGp@%;m)>9(_Ih~XWyWH~6NqDu-FihcDs;sb=UGxsVg?+t=+PFKmadBp)s49nNW#Zzu#gfjs6Selr zC7+P+a(AfW_qx%(NtO5Y_4UUanb`$|g`4+QfA2ct7re~pV88u8joMV6E6fwy(K;cI)a7-~Td4l^bk3sd2}juX*oM*?H^k`o1n} zTeX5k(eO%4?^XU}4cW+J6AY*HHGGkqvVlLgT^qeU>Bfe{M~}3$p0IdoE`1u* zVe-aGswtQK{e!m;&t6=T_p@%R`0=NCmue@@vQqiEFKj~Xl}f%jYv*oRmA%5tOF+o^ z?A3_B25pxnrt(NSzvS?E@5uQ(VB3g$D2DlgP}q9`+V!;*|yg| zM(sbAA^#wE!93pUypam;mbN6Fe0f#Mx_0+f=>^t$Wp{5XPjZXx;XSrJ+&sB#_sS22 zA1>X9o6qGXAXNJ-^^{r9!jdhh32TN)rV!{fF@F0$0TD79)+#^E12mS!a7}Exe0|Zz zwBM;Vze8%jaG2zTc{rb)+;RBS=lDa{S4yordY|Q|J1&(h{o5=L#81Si~U7W%d@$CLS|k?;*bAnE2 z3r;+;MkR6PgrdM{LM<-Y6HcvEO4g9AK#io359ui>3ikH$e0+SD!jBxV70Bf1_dGhM zSN-DF)Y6`JH!mFUH;5}wF$uqPCueB@0jkZb7wLm%G=LFV-(lFRABuk(zXAlVR%BM3!IGk9r$@zr0@~6CKyV zdF+?>|I67ddE3@iY(A=OAzUhGn_Yg{lD|c$#YI2C=SGL&6h1T51l`QWE3|TPsH(oc zzl;BydwZjyNnmH?&x+iWbw94JY05k=8WXzqijzBE`Q-PBFK3!s`8MkCmxLJZy2!ER zbLQIMXjP6*KVhx*Q#}hyE+D&GHuJ`YMCKR!uUrYSu(aj+fBMWBktLvmAufdS(zgh`W{mMl@xiP*sKe*gb{RiZMovJW3UQmPH*xzXHZ zQuE=er;F;6$KBg^dlYe*Jh;GfEwswdN@&xhmd{Vl9;z=L14SO01_dTg0hVi}giv$Xj1)G-&TOGW5@>qlagz)f^!)Yl|TXvXuhrf1yICI6icYbI6nwz^- z&lqXxr7eqaRMn6QexUGFvS*>l7p_}IZBv<@Z%%4^KChbZ^|iIa@vCZoeQDgh*|@2x z$wgvWOjnnQ&+kVXNt2B(Zkwonj5lDJ@sf2jE^+_=ogMAMq!D|%eQw&pYlql^`Nbk8 zg_Zr~5PK$;T2`$5(CnOR{E-c8%q3;pu9Vhztv-J$>Q|Q`QeZ}3GAMZ9Fww5+c%ST! zy1!QUDxb^pNEkSjWiH8_{l=z9{5Jnx*92$&J%%e*ewn*?f}BRq=cG5HCWW7mNB>TL zW&5fs^i{(rhb znR;&{Q}u}xXAfAj+Y2avFJ9Tpw(jc(_Y?OgrG1LJd@)=x0Tl$Qf_!~cDc1}5S0{4)~IcjIDD!RR3z;cSQopSEj~Uz;E793&7K`{y9*yP zZ8rUT_Wu3&Us|3o99^YWbWgYc7yf0-ySaKBHtGDXi94h$dey|(oZIu}$q$>pJH*w= z1r-Htd~u{`$fp#TQ#QsO21y@M52BxJE`oYnsUl8P%owlmFR@sPW3z*Nd4u3A?7(xGa>s zyqMpwizT4*XJN*R7y9e&*~!$&{C473HnNpEcg@e^oSw(L)D9;>slb0ig&t+a<__Acas*m29Yi(yAK7LSoZ&vy7*}u6C zm20w0@#xw)bLWqli=589s834^l1tsR%`3n}joJA;yJ1@uQ}_zWtO-uE4+E@+SE{xzqKe%%b9hf|zcUip~3W9?7Qban);2cu6;z1hm(< zES$^mQd=ebaUMe&gKLJwCZ3h|`J?mS3B~TZ6>{*!G39{68>Q~qh6wn1St`jbaZAda z&gv>xZS+a!&f7buRNGtjNk|EqFa3U0!YkhK*oLBn6%m&{oYvnD8dh1l?$XQ4%O5^? z(2#j~*-B66x;ne6Z*Mw%XPX_od|BAJo$u(T)YA{1e?L)Jcxj0z=pLl7)$uEz0E=SlCo7q>21lk$+k^7jj+o-7lY>yPsnuB=wB5&g5bjQQe5 zG1>S%cYpB;ZY)%ODCXxnxB86L!sO{y;pwSs%}ZXjAti@X(@ZYZ3g55&F8l8O{{B06 zV!mc&yv}#vJ#_f6Vcs1J(@cx{e>P$3<8<>bF4P;cI;wR= zD|p@FJL<_CseepQ?}U56OkvxXN>kSIL+{s} zO)mw0{QYIWVdIu#!FB!#2mgt7cyv1ERPDLQw&Bgp?;pR(tHwY5_xXeAp^KB|=!wfl z^k(_RRqXuA;j;8t#~fk%xI2xFy;X-UUyObIQI_Af`_;L7A4*p)E0tb%g2RBRr$I*0 zpxR^pL#rwO{{BvWer~SSmWn-Dg_k}&pI`sz#6)E_W!|H@ye3VDZYtW>Y?ZCKBcpP5 z;UaUr_v?jkP5ZLH<%~+su20PGRPG8-&zxv_V8+t?x$}O!eK$|3f5!DhD?5vA5r3`C zZ?Bqh=T+&B2^{|H8G^qPPA`7oDsn-1c40<>6iDV%OV;_~sm&9E8BEd5V<~--8bc%oXf@$9qfJN{@5?-009 z+4*Dw+GDa_e~7rn?5)fjd9zYt|C@$gH$QdGefa1=rL@nzhqh%w1{fQjJ>qQ;BmWoL{b9z`~R2EyXkUW8kkMyL--=OB%Dp>=wPa zaYTCk7u_8h-QIyLQxsHYU3=qor&NZ^zHd6yyymT~Q+HXlCeNBZ^Wf!s&b8jpADxqb zuveM8g*l~iy8pZx+LO{Jo+5jdwN(Y#e4onuiW6s?+ZijaBWZLS-N;7IG}EA&)4r0({P-;b?ep>4?GmJrno!m zE!DZ?kgOx)>1cnxVxOqgkFbfSKcpJ+sd99#Y*_SqPw=M81&Ys%zTLaszNfu%#;vNR z4PFWX!zolla^ssitc`xzY!xCY9peWS%v~Wq#-l|e?Ps?N?Tb0kl zWUgP8KWEQ<@aV^n$W4uHOLpW%Tzs)D#a#Fu$9xtpVOxnZ5#i`PU2ngaJ-ol${o=9J zqLVgFy09V3_RX$flR3fLmrJZOi#racp4+fykIX~qM@es6 zvij^Q_TH3`;j&lj^C|jNBrsjFKRwGn^#b?N3VE;i;$s_%5;USODah76{=R@W)8q7+ zgGWT$uQV4HZaMbJ+*-Q6fUA;8Tk0jZtHix(0o_SXqI###)hN!9JsFgknWuRvcX`V4 zM^~5WKRBKpee`I2kxpM2Uz5bt&{;cvCPy?gKHA6cEOM{?QeX;?HhbG9Ro)Vtz)M$; zcv;O0$X!@HQFF%jmlK=#a$Yo=vzboacUa4Te@(^`{YCpE=JnaMuV4R1gYARU;)g%J zy-NFQ z{rd7!_}WKb7reJyQ{nN#URf0sFabX*l(e9uSL=IVt5D041GkSoxa)av!n!~EW>&C&wmu`kxh8B*Y!Jbw_swedg`hl^+8>M5aH+7|P#(bWo{v2{vq#;Vfk zzl*0dEX?tA_UyK<`B9K_@6U_u6?@DZ^p+^v|5uFrx#WS->O)T<&2GgVct+xcVa2jc~+4~GT{erf%m|3dL$e?S!Hs(Xwgo1|uE z?y+{*(z5YLmjCrvZM6&Ak1kDComy8~{qW>0#|=i?Me3KWm=yYazliyozU*(4e%$?c zL78ds0x4N9S+0kkcJfdDyi9z)Y%;Tk>lT$%_3mv)xn)25*qiwOxpm{^(bLKDUXjt3 z+d`Szx)gaAE?lVO{KWqMALZFwueF-2VB7n$f3-!0;-V8WTE(j}RXI9y%wjZ|CrY=a zEc7_vaB)x9{%#@w?7(k-4?Ebx-amn-ep)&dyJd>D#S#D7UjfVB@cszsyRD zPc7W|_?!HT_t&|fJPr(6s3ELmT(bJr+a&@zl}UPUx@A|NvHG-d!xV$efMb4>X6sCE znU&+|oH)Pj&!3t?+iz*N4eKBF4bB7`p)T3FB2lC?OO{;M~U;q@7rR@u6sStl%xvrJJYEw5`X*hZ;c`x$#Tfj`i9<8EQ_rlTpx?I(#cXO>O{(t6<&L80_f!*)_ zsn+?&znuJS|MJ_-DN(xvud?lss%Q4SE6pwR;o-^DR}+IG-gWo7D=8WaO74{8D9CS7 zT^`Q)+VA0|d)5MaX6Fv^C&wL^Y?`z3o8nTr_3=mj=a$XsTYci@w4wq5PsP(m_dNE# zaN#Qd`Jeovcion`K6H5!`DN|rEgRZostdTjuDa6NzBkb+D|MGhKd(3ECY|U@8_M6u zfo7Tv4GozhLfuZR4qtyN?eEeFY_nYKX2yS(f3#KgxBTqj^<}0Px4N2Egnf`UnyR3( z>1M%Fo)*I^`K=mNi>4LxKFpq&HnZbR$bqZdGLBuVKDqEi;RmtIbte;zZPjHN-~H=2 z*>U*O=Io277EAWI&l5YYZZ4eH9qqo_`j>*u)&uz~`-*javVLkUlbP}NhQjU2?RrIf z7Tni4wO{mSalo-l+@9hEs*lX{j*Gr{q4@RJqR#5%H~W5@`?<~)O=(#-x8JA#*s;_1 zMHMsO%l%<^xl{5*@N)K7;VZjE|KGjgrP5c_9k7M#uAcXMMlIFW$FG}NTz3V0ySO~7 zY@&#t_hY5*>8BLK=5Gv;Rz0yP$J4oA&K9)JU`xhDrf1Kd{aV);c-vEzchy1HBTEyn zO;`7l(T?RXdH;0Viia;VuI!u={@hDo(x*dix1Sre-BJpzvJFn~h<{gdQao0`?YPF1 z#UJlIWK9a_wmR!plg6}{@8ipvS1+kAnILD!JBvS>TYAsV{9V_Smn~kQzN%c{r`d@l zr;BQ_StUXX4>G_08YaIzV8UmSWrC!{6r5nA?WA6OG+tOVo z7Zl`=EKB^>HmQ^OZ|&nRc~YzgR!lYxdhR74r0jb);jU5JrG+~BJo-oD9JBnatbVYp zQg{5v7XEl$_haVJg%>Ov~7|E}%v5z9^}hCdZ|(!6{!YSMRuc>UGQS@M%6 zYs-qB{8-_(ZAs$YU8My@MNaSU?M+Pntic%M`Y}_(lc)Ier;kU!owg`9-sYqEw<3nU zB5Y!Re!Ixjptw^}r)E68HqCI#J|$6BXDdI&`wUi63nrZVI{ja}WUkub>nmp&yq)}? zX`&F<<2iRemDUthiOMZ;ZE~^+pZ{NI>zwSQhIoC!wF|ZKwuL@SeXlsF@*;P6#NU4k zoBws5Fuv3{J(9op*4}1TbvxbSkbmpG3Qm&gIFPqXWhM9@Z{3Gp5;;3 z9Tx0g{wV+K8jVx876`3LPVZUR*Dy6;uAvUssm>1LLx~d)#d~W%(ed1=zD#b8a$H7o zbpMgo4X;m}IUBP4gWk%wllM6uI=N+&xRQK|@2{uN_nF(eSzsCnHjvo&wR(&Wj`504-1-Q3Fk^!tr3 z_hn7KR2N13y+3un%T&Lsw-))&o%iF}DSxJHudr>EN5s_{uf17#>g`pPe&e&wJ-$*I zVuqcPCr+F?dS|`qeUBgCI-Y&_TGF(%RdRi7%VF4B0GXWbP0p7U7X1#6h9414aslT}Gk?G$-9b+19@gbmC-j#6Cw?;64F43jQ47>7| ztPR%=DgJWoNUiaa`x=X!OV6KL?d$${{%`gRORL;tymz!Fx2{{L|3@`m>J{_fs^s5q z?_Lr=%3*7eIe}r-N>AswIy=34ea-Qv;wnp1A2+Y}%QF2nE3WU+k43X)?^O_-B@?^l z)fI=S5s}{Plb~a-?WcC}pZwm;^YV#tK-5l|>QI-xEPu)mN?(nxdRF`}z)Miv?Af(L z^-qdUrEV$BUHoo!fmDHdw)@1*O@Aa;$o`a{(7NO`Kg=%dRBNAEfMRWN@`T=3{Mz`OAGLZaZ>r(bzKimdN%cGTj!v zlP9KsO#8giwS)U=Z28A}rBAIg{+dQfUaJc?m~3~-@Y?agf5p=f=ag?tI(nW<^B(=} zZtvz4rWBX?i78Lz&Y^?<^H$_Oy2fG07pHWjB;)kFUA`gbZ@sy^K)v9W*=4myiTjX%51%I@5T?OR)Ra+I^4Ijd>D^dYyl)XN!0&`FL1zc&;moLJ&` z$m;Y&6Os9Tk(#Tv&fV3#RyE9*Pw48)H+m{YtXucY;c-%lxR|k8ot^&{2yfk*jf zf6x3j<0{)S{$pDlOZnN~SP3!thg+9qJksqmKc(d*Ae8$oZeFQ`^UD`V^I%{7MARhJbbEAaWWg{>zIl0W!(EMd#(As_WyQb%5j~w z&v1(+)9am`Cwf0`fAqn{SpH$lN!MzYh{){z&`eXLS(cPb3W;;IjWRX(PQT2)7_s^9 zCYCK*@~!e57#0Z|eRzG)`jOfBoBC{Bi$yjyEJ=N`ATT%l=#rDy>aP8|9@4bvVtv`N z%##vc$7h135x^lowbNt@%jIp&_eH4SX_pg-+|#tzV|5Att{kcs93`u6wg&p+(6PX~WvLhCi+s zweH@mduek~;fF)aoE)#OU0Jb#!3gIimzEX1iZaQn zw&qm7&=oLcLB^9lW7t#=IMB{4aWt+Fc_k?}x$VajpFfqK>^2pD5)_-fYUe`1tblK) zo1e=+cvxTi`}&%uPUl&Pw-;Lf_+26~>-WyYI}K_|d%vCkf6r+(o7@7cDUVFG6lDuN zXJ*7i*NHc5+%#=pBrGQc9I^w4@Jy2z{Bo`V5i@2VK6+K|x8I$^_bxBoAv$eIeLGuD zW^!udnmIP5{qp;6@onCxvt#wVf7?E-J?G(Z_E`1nzJV(E^ zy*7#2xtcb1r8&FlM6SbQP7WzCweO=~@rk*3(;R%$n!Ou4%H zN0t96k@!DPIy}V~cdgOZvWt~zKCv@ELsO6~bf&=yPP0E{p}AKVJf19G{qyndz1KIs z&)K0M_9foQYnp}A$D*ymm3k+#1SRM5RZDvp?wlbc(mUsR>#<`JUh#^c;UaKeGR~a9 zFmK+Hg=^F&wK_fB;LxeTAK!K0Uon5n`a|CimoMetCR4TI*}Dezjjkszyt&x8z(#7- zrl*hh{iypc>bNNCTKVyogTAbM+S}t?ysG$)eamEdrLJk}eMkDzmGi!HS5E#_R=iPE zrH5dUs1;wvs~OaqN4fl3JD$!wetBpXd$SV>t)Ef7y~ z3zJ*duJ&MgeV(GJvitQsk-+~mznOkK{_*##jo-4Oh0C^0Ysv`VT=BN+I=jiiWB1oR zIP}({r)n4DuI;N7b)<~1%ezmu3N_5kFm{xCA+%-151t>_S1Hw)cdTK(-|OBi6`GJ7 z>tM%md(WTt4VyQH*iYu@G_18=qQkQP!;%Hz7pB2;qmPtIgF5r21+QP6JpSl#{yB~$ z$Lnp&)1PtGcb^K;_;~G>?7>%>`!8%>c;e#6ukzQAykEwXug7sr`ZT|iwl)7-<~<*x z_A^YJk$tlN`GrOEtnW!&ahcG+->+54NKy8$^|iL{#k?9R-~XJA5Zt;*NxsA+IXRWD z`nLP@g%gF>PEBRBGz@ttKCeUtUQ(S|;>c9NrNX=PY}i*;UwvaG!4e&(zTT8i%k_)6 z%xz0Fo%Xow6jAt)vRdlL^PCw{eXZ-a96qO9?i$CdRq}J@_Sv?_^lcUDLYhcriAC~)BNxPb0yZ`Hq++itM^)*vn=_@SjF;d(W=dBGyGFp zHg)|9Ef>4ix8;n?!<5xtk>~VnURqB^39j%)kLGn66DZP3Rm6<6_(KJKWA>LbC|p8QHD$CiAx)6KC^y3Ke^qsdGcOUu|Mk% zu(xy^+3bJy_?NBjf|F-xdK_MJaqYt=Iqim@zjI}`Yh6;cP?6g&&+E9~uk^x-Y1&0a zpFcjfH+THLt*>U2-5Q;3YgU=qI;F-rBX|8>?}n{YSzm>gYcjRY=}+g`nC2t3 zWbq0X9(cxEePoHFR#wVIkFzI41I5$Vxi5}%)}Qh887KGgj}JUlmWTW7=UKVq<)wK# zk}i)zR9Ecm|8UWwXr;oJ={lb4zh(D)H=h@B{ielUX};K=1IzyRWyQU{9TCG{|E6fk zhPEvqzaRP0=sRJ?tk(0}W1HH$W#eAG)$fbasJwBP{iIT*gzFK3%7q_dl$72|SFfJ$ zc%+#t()-2z%$YkkY~7}DHE?R*l}VS29=u9feEQ4RRjXUX=fw#dCn~EZoLKU3Kkvke z6B{c(KRbBqmQ?PoEsp!^{+i^8s_{xGUW#z!bXrs>^fbm%oToD*B5H=B{(D8g`2Mz? zJ0)Xea$GbrQet-PaDVbW`TMC#x2dl`K4EsUv<+$fV|U`w5-r}E)d?=&tA3=NFHKyi zR6pVRZ}tZtjU!ydT~D7Lhvu}^hgaVzpQg7d zCayT~(g*oZ)@5%Ry1Th|mA&oK3|^M-^V3tQ>n)zmZXG6J0ZkJ!=D1jf8<@2{>3NZm zt?BbFM8inQ%FsE&sw3k<_@}oYQUtGgZlCndu8`mN^{$5>j&>BewaKsNWX#qso_H%Z zG^@gbYxR+()~qQ{UYhM|s>!)m?j4(c?DN_QbEfI$?Nw#k_+aX`NBQs9TI>DWzc63a zr+%O4q2!>ARku^GtbeDpsL*KhXM@VCiGk1WPSm}yqAc{`^Un%;ynJP0N$;OLw?8sd z+AygbfAiJL^XMz89?JB|IWhe_yPd^)vZ)(!>9L-7!ib^KxRD z>#ok(_;3H2)pw&pkFSv9{qVf=_o2jTr{;(=Cnn~p)h$ygDACTF&$qSEUdz>m>FVl- zN;0{f9Djd&_uZ2Bg3&+R`r%}!-_yUz%l^Dr9CF-g%KsJ);kU6$hEJY5iGQ&a>H6g9 zby{qqy~s45)-~o=_H9_&7Ic2{=XI}soH%<{P;jDWR+HabE_u(BiC?vKGA(LN09_e;!tacL>{WH#Y7G z_;55YaSs*aG2UE!bo$a$Uwpl0bhMO*RyWSyIaAW^&o`5Knc?0ViAwV3hlDm}emAwb zzPQn1+QR9)dnHx`$nH{aQPK`+X~}c%HGe4jev;TcX=zc$QwO4EtPm^TwX}Wzo!x@_ zZq!Yfs<0M2-n970`xh4J*SOajRy!ZOYZ~%w-xW8Di+MNeqh(|T7e;>13oo20 zC+ip(`7KS|Q7^AeHR*=#+-A}X0Jxbps6QmZU6sN__=PEh# zpw9&5(3KmOG^$C+$@LXJKDN^H@XycBLEAcWZftNoGk*eJTcf*hQp63%xIXlr&Yv}jBDSWvvp(Iv(ztID#I{lZMU z*33!8N>f1#iT?ZRPu|{UCTza=U$Gj`TdC^K3~sKicPmv~Y8}Y-6T= zWe5HLk50bGsnq3RIJvaXso8(G+WE(oH8sm;drmw2X5p!0Nt2F?+AOiiys=DtiO-q4U$WWtvGv0Xtp$S9ysu5&#(3}0{FH;?{rl3E?-l6q z-&Z>&YmalV{Kq**rmQ)|WPARy2gjQzs{|9o}y?eBx{mdmr!IKL#cz?~T;kfpmN9>|# zt<@EwWv{IHCw;uVQK0afzsj@E@%o(U5n5BeCp~|-`_V0Nqh|BZ#fOjnUu3mpJH(uH$S}U)baJ(kN5mvWcH#wKcH z7tKRgo963x{1Fp*<;jwMOWd9HpyaK!axpS%3LkliTzkuN?8rf1*B#5(_O^OFb3M1o z)ON1HwtVG_`hv2%HqBE~)mSrSjzDOuj`6CL_$i0(T)HFkeNm8$ZRkRt4fXTurUb?Q z@{-uz&gCtok@M+Dr?7{QPs{Gz)@pupG@Nw!Wh@r(?OgJLGxWsL!o+9yBpKZr1QS>! z7~LGASxy~Y?s)rPp)E|PL2v>;laJJs&0;n}FfoT~90r*azJB`fKmw-qr^10Hj&|7x zl@mecfQ(D&Wthwssn5^f2Qiyyg8~+MIA>nZKF$8_leDL2_#|~z)x=X%G#}|rkTT6m zINBvzP^7hkCF9|tRt1~4Z{B?Pcw8QI+{mNvpiOkQwq_sQujzRGKqK?%{2;ZuqLh>+ zJLa02nTd&si79d3d~tE{L5r;Y0yl5otoZly`NYYSnY~`QZNI(m=d;-hTJrPv|J`=L zNI`)Cbmg&*uCC+P<^J>g9z00cv1?b-&reTRgsq)b@nBc!>kXx^!#0X-PCM)L{M_8F z@*b=!Jhw9k_etyP>nl#~S@^E|*52xJ3oENbckaZz*{&PCZG%mA!^T4s+BU7beyvtJ zbdQ*7tmnRzlR_aYgIL$^`?bpB?%X+Zddlxrx~ts!_3!ulqwn=%b~NzY{}GsNo-cOD z#jC4>Iki~Yreed7?OhomHdngNX{f2W39u;zWM*b2{mZ_#=HUAM|4v!F+VG>KSa)Ke z67PAt-)kx!*8lx_y~)Am^BLnG51RQ|jw!qM9pShC(;#i0m+R#p5iypHC(` z)y2ldT+xlxpT?Wis>qufyMFCsyHy*mcHh_YTDg1w?|0S{XHJgm_;U5narycOpu4+% ze|vjzcX@sztLCTt{eQz!KG{@$Qh9cEwtAiaslBng%U)H9itPwe`*ut^AGC<*(W|m! zf#0vHe{u+8S7KSe|KF}1Tl)6>;5z;3#>QmB%u6aJ`S)Tb@N80(u`YYF@pooi;*+nQKba0xOfu`TJ)=q+Czw)V*U7utRMUcJaq(Ob4Zc;Wh-Ur9^$hi`LQmY@{5 zyX>vg^>wj@udZkw3q0-X5azh)(w+VF@e|sn$Ja$hhx*2uKbC8ytbK85Y4?xnR1KFUFDeXFRaxU}zlz%ZdZBzQpubh! zn>kYQg;?ON=8`8jmibCgdv3I0jlncF=lJz&kN=xoE1c`eT>JH6z1AhrNePA1{NpQ1 z#A0Wd+_-(a`Cr^jE?M6_9}aOVcpnb-w{5+2Li};Ww{;4ef3h5l$_2{TX);+G2#HHRsj~_o~tpwfXJ>m5vyJZpcxIe5hSaR^f_3Z3>d8bwf z&M{fhCu<_>zv4{kk_9j9jgALv*7Yp3aVe26%*$J+5aQ_QSn=_wxGQ_l;&&FSqSnOi zjfz-(VwS?wibWp`+x{&MbUU4TdK#ybE8l|1$jC$c+xE@iv_BT`-*m}d4|zuyua_3n zz9yZF2$Q&!@Z$B>ulK7yc_ybk=BSRwfr}ZVbD)O4R8r__@+`jVL=dZV1#rH1JK6!G>ziZL?ib_gO zEgL7E40mNXF{|Lk1;vk|9?zGGu(`K-FllO7F|Wx{oSuGej$mIuuat?v=jH!3Qnp5J zPCHaQxA4lE4eR6g_emP3aiseG7xY|EDG{aW$x^>Lzt zT3@F6K0Fwr@nq5>b&Zg9Mr|=->byM*pZyKr!S>KqrSZuOJFAoSZ5%mCNlK@SB?H1V zE-kn{?cuE+2IT{w<%bN)3%Hqlq)HsSUmGz&1*b|ds_{;(_*nT0D$vuQ$6=6pfmL_i zX^5T!93e?8GfYAr9euvi7^ZC(sIm~>vO^DMazLkG!li&t@%hWo!L+?-VwlVpd*RV8 zn8^;RZVjH!s&O^*tYF%RbWn)JC4)cDKT5nX7P6JvdWp52EBgA?t8$#2FGb7hcKP&d z)aVj>e6V7Qn*AR0Dcj9$Z03{%wpU$WJ7Gy;!+8&zvr8OvxNdnE8fsSMPw2WbVWY;b zSv~BJ183EIz87%sUc#jX&u@3>@cylJTckdjO^x^HHvw&@r{tJxdQ#u>ttcnROdlze)84ILGG9EmW!fvX{hG~huYhJzMQtZlZKvQajniDGyC$W! z>he})ac?sD@>!}ej?qWz#-#((*tVHutoN8x$)47gkF`ZSZW4ONxKM zar1aFOO^S<$DC@sH8uy23*XWfQsecJnqiVLZ8}$_&Bf!z7FFh)S8%(so5`DbGMi}4 z8jeUotw_OFE7`te9ouwh)1s8f#7hgN-w>Da|ME?u&(8Per31%>vDxvl=Xk-F)JqCs z^P$0TR_jcGX+&5=*c2V@oXiWs;uDrWliwar11h z!edSGlTzQB-jmPC3^3|Uy7b}xtG|E0-?u1uz;Nl(C4-a`0wu4mtUOcRqrm&l`Qm!X z$!ttrP)7y}dQB5mo8qND#Yd`ZZ9-9zk=@TH!a6ZKCU`dU&6?uL42tYqoIH@=UV3O! z+f3v1r2qf^78DmBUKhLjN_NSI2acfI?D*yFM8sObi^+fK#_f^#`}@25Vz*wCNp17q zIbWQw%gdx5*0P3U>Kcx%)pwJY4xdEmAqIGXno=1k;{3`h6^m^{S7h`Ca=EayH1MrUV|ts zVJ$eN>hD}pVR5Widbt|!^YinMzq-15W8z^p!$)6VU+?~}o_%=x{krbAx3@3Oyu9qt zDTAaV9KF-n)OdCNsOAdQX&Q7U2`&A(^nko{fASKRyMc+iyjx_BdLEuZ!TYWLN4IY^(IYo-Ny%~iL1_j92q8efB&)rE@Dd>`_#kMMz!`zn|E1?+s(Tm z%CgDi%L*$A=IAZgw`R7kIj`U5e)?3z2k%WfiqHQZ+_We~D0E@JI($8`b6lKU)z??5 zGK#h~HU>F2HYoK?*tO^EOp_1YjwhSepN^XzH65D3JSPgJfZ}##@bX9KjdRV$$y%Ty zx~n}VYx@?~?J;(tr3P#pmpVEuYdO2_?Jcjh$0I&?PtxVRl4Tvwzu2X{M*`L zV)N2UqB;80N0!NKPdWGfvyT5h?N8E!zXzwWy?x@wx#`dn$K6|N)`(4Pdl$-<c#Ep z&h=DRLg*qp3ybAOrinKNfj)!IC@Wq_t!Url>ZhV+^y8acVG?(eV8 z0#7CGDMt&xytw$|-|zPyFZ0&DPfW!0J=g@f8P(MJ7JbJKMGdVrF$vrzl==DJh4x@VUI+7jp^duJ4`nl zMax7$%14u>c8QURk%61moS0?8BWEMAe$S^a1>WpyYZ}+D*Z=kHZTJ1U-)E;7#jRSZ zXnsrF@?qVE?49vfHg4VExc-*^BKNhYo(w&dZPo0}iGoho~K>mlgM z3lEQu9lb^8=2$M9@-fKMIkWl4p5N+E?d*d28(`(eDQ>uCp6c{2*jj*tQ<~Pi-u$AX z$&;D8-m|(Pp35~b(MEz_b-zk!t+~g_^(>JSRWoOpWCqRr$0*_%c(2uyS=%YB<(B6{ zA(4e;#)dw+$C~ZUrmV3}@sv98b0U|k?}hrmzrTyyPu6$doOU*)(8lqxq*bzbQO=`; z%RgKu+8#JA+!nab>uqa1mygEjTf6tzgUWylYwmvCB~|_8#KcE#UeVFfA6w3x@#&E? zb`w*aZZhxW>2HcEO04VmJm>+f8+3bk^qBI+YrChWg=huv!sEIN7S~ZyAr0T&+!U_= z^O60`*|SI2zsP!hZ*O(-`FXaW0Ge-FdX(`FhwqVNFYkVMw(sTCTeq-=F{r~)QL*F1 zS%=i)eX<`uemwZ+=VxEd$4l*A?>*1|@YCFiHI>#J<*ItT}hNmJYL@JsQG~sLIwdeNp56@SBpM34bwGXSGHRZ4S(<3}F{YtFu zdwY?$d!i2?y5lteo#@pzm^-+n;nA&K7<`bD3MKVJJc({)Y2Y( z)^s#5H`l&=D_~+REO}gU|MxW)QBHOA_aAR!)6Hz8Z|cSm~B?ySovFDbJC*tyYpUz{`PF{bJUv)k7dgwc+urE$3pPytEWuS?S%bE4_aG2lqVo?)uZ`!920O zD-52HZ8ojhutp;?EX7xfjYmS^-E55*uU~UDS@(U|++i|jkCbYWX-~~2yLow!f8PlB zw`;rn?fmz8pMFc`-fLPP{PmO6w!nF(q2*y<%c3cM(5BcXHQv9!z6ysY@7^g|b1=Bg zW0zF*^>wk!rhGKg*}Nq%U){Nn0dU!M1I?>Fw^oY^^}w&Y^+{{L%UD@DwTK9wZb zCcy$K*1W=6+9ojO{!W1VJm>yCS?}q3uGiPaw)!7#WM=>H?RI|i8r~v}luV9pzoWn7 z=em3kyK4CA<)r+rF;!{2?^IevBtLmGo>#oEx>aUb;snDD&}<{1cwv+9G|_3Is;9fw zcrr6qe}3jW$D+`w_O$9xixW!)GCRCHj#t0#d)%33SFy5uu~7T8xzl?!+0*R5ZJ*9O z&pms+`%l>?M*6kK4nl*#t~&0%86vH8%_}G=Y1z7U>wy{TPWgFyP7M9x;^ZZ^tgepb z9N)DF!;Qr&t5UAdwf$lIWd8Agae;>nV9ogH3@f*0$x0j89CV2ha1>l6Ozbgn7W>SakoLmjAz6G;_yvnJ$07^{XaaV}mAlUQaHQuScPZZ8LxTdcFSYalxXV zc?*ut-}p{qQ_-glf6iX&R}Y!}Wd3jVkFVF>?|L&cu($2-(OaCocfHD%Tv^ic<|I)FVx4vZhUk@8Mf3V_l}S zFAKV4$htLfQ$5@dDc2^{r^C~Mzy=XZ$A#0T2yUwW{x0M4vR=if;X(0Qmo_{(Uc949 zpV8QTtJ_{FJ*nSkPM<%#AEc}EhiuAlzF=l3j$YoWhwgqF$%uXwi2B!e+WH}{D_Xm2vqjRB9@%oAAQMl#2S zY%O@`wCD3V>jiHng#7yQviW}f|GN!JYwE+bE*02ou@>*QQ?m5 zZM|KkHWhy|$a~2cnELojUX-Qts$bSJbu#vG|F<4l`DN{A6RVKy<{Lc~S5^lt-?KdW z+2^SnZu+t+Bp=j2x#^qKd;V=E8Z%u^&N8w2{bn=hr~6t5?Unihb$xCCW;=Qm4 zeoMXhyl-+*z2~P)g%-Ujk%rL9^5$eV(H&EGC&j(djgt6o^>X{YDsTNu6DCgW<~+^+ z%2z7n^B$pc+4=D|B9^iA=NfTcmEH63>4QkSNpXjQXPRtK3fZJ&vQ%Lsyd0lb{mybt z^me}pNsX*SH*ZQhIy%ml3Mib%}ut;l7E-I3;4b@`z1s37wgaKkM0naU$k-kzkjwD zE-mNG+7;3~xp(ogUonmFDHmm?H2#yF4#%B**Fg?Cu&}b~ver;iR=&8SF!`b|=%j=F zSK_)Sx7q*yBkbNMVn`yyuV-fOvESDuLX|MPXpllAJg-}dawg^t8*+Gdg=bjq=rt#gV0 z(y5bReQfSU!d&wH8C7AP_4=JA0v9EBSIl1__u_r0Ih)zV<*$Wq-|^$QC^0GW*5RJo zw`S0)olQ7M*9zLG(U@hD@hP8!oBQyOkB_g~@s^x{Z{}@GJltmcjALc%nboVOxJbR| zU!}q293>sX?|a(w_!j5lQpsg2PA_pxKk+5+fPU+u3x;w%?UUjsRcSNXN@+QUiTFyL z+GOH0*Gg2~Z;k+8-@AA3W>^$1;@G%EXY#5E-np`J-qF7=c^v)~d(c8oMs-$c)~nu) zwOoC{JdukoCmX*1p&nIkx%6tqji?)-0}a;Rei0PNIXiaxPmh(SL8Aw2{vQ7QSkS7c z71DRt^oqZ|ZRtd|toR$ZHvBp3S5Rp3M)bPn&z&>&)GyoS9C}^ZzWvJ`ADi0+8yxiy zCOu4dJrO_oR;Mzwm3ncBSnWlC%6*KYD?ex=8in49sacVjJYKIlLtCh_d_JKmg! zkDb2%(q^JtsPutzm&BB%_?i|^Z7$gIWz+Wq3#w{M-gQH>=UnqWEo{*2={mVBzV4^% z57jB*n%yQBW<0B_75ct7=;~qP5EEC=&u8mr^yhA`JHCHm`1I_(nAbB+Y~OrQ{A1|{{;2bdD_o_yv-ezj!g{52C2zF%+rChk4bc%_GETs4 zkT6VYIdM`zK!Aa}+x@2KNyZ>PSK-VBhGO4x7tEjT|At4LJ$o_#{1bESD&Pw*m?B>=Lj4~(xedC_e%X}tx$~CtW@>a=vJ7r+-&kx?p@2Q3;9|3Ke1qAp;KC(*qyhl7ERln-&qT7Hm=y5;E{0v?#da)>3pF2 ztc_RNOCa#E>D6f_F`S*pm)p+`=Rb1%tepJ5tUElts%NVX?Z3>!=4Skz!6wq!L$(pt zWIV{?+}#JuTWY)q4<3B9jx#?$|Hcg)tLXciQoF?#W&Y8+HHl5?)`rLZo zo7MWubd^N?g8#OPYPNjK7oSr+a5vs1_KeM@fKQv_GfZUC7wJEEdLVJb=|a7&``>Oa zQ$BE3{%h=OuexP-tRGBwx+t0i8C39HnLkBAdy^V($!1m66s7%bhwONBZ*MbwYBlYb zPOOKsU!L8yw>-ygN*lWVsxB@F-jekoFz@qDxq5Ex^Wt~DX>Q$dI}CI%%OA;4{Li*j zv)|*J_v7U+vl+8zK6JPdcyX`YM9HAzsjt3y3Dg899jXA2TF*VA=?Lm@nB3J}c-Kf` z{WQHLj;39;&kuckJMXnC+tyEO)gNS}r|xOYx@a7G*tb$XY)BQ{4)vR4?1Vgo|!mlZrACJ z;TJD$o_X!3B99@P^Y?`;2hG3CIsK~tW5T5aOB}5`gPTBuRu`^bQ=R(i>gw$6Om6Fz z>Sr!Ud}?3Q!8+UTaa_o$9*Kw+={-}Lx9X;AgwCnm8jc@) zI|UyfT+-;-Tqd-fC32!EXc5VzeQHzdYyxY3FG+v)=&UHa_ubI<{hThj?=Kye+Qq9#jKXp>Os`+$D2ZDM9KZhQ(YtSM4@)U&~E&$TbElcVLQDEz28 zu)16rns$!3gVRoc(3!*S{Kuc2oxL@hQTe)&O6G!v&z?PPUc=YD&$ju+k)9y;`x`sW z=KdeIC z&qv)2yt<7m%OXtpTIXHG`s9eej0IcD41PpRQTV|&zeG~jNDe%wqh9DWLDY3(+l31O z6BR$rum5)u?DY#v7JewO|NByAU5m;T%jT#!zZaJmPV?YP&5PUT*SIUMtm~A*|7PPr z@egmW&Yw8>{|m8TW&P-h@8WZ8gswdc+puYaTSK&SX$#*(J|+2IBC#7UJeTy!ItRJR)q1d*9h{C+eJ)LyKEFTy+udXLf?bulyk=i|uwd7VyZgd+ zPGr}dJ(q)J>YnzW5!DZ~Tb9}he2EpKIHt0m>#sj;L|kS>F*>V>08(qoW7mGJCX~^%k?MuMeR;Ch|^7; z)b?nxp628fPvh(t(X73%LtJ08$*p*nS8;jw%9i>2zHGnyOExyU#HlLJ>YRIg+uWtP zb-Q<8ui%byJI@>Y`rx@se0^cxIy}0Myv`E6?syjVNgb@H)1-iyoq_;a}a3Er)pf8f0Nw(k4q z-y1N0oY`5^I9=~R*o0lnCW)#ZI2vJ)sgXN>mTd^@&(|9!e}8wkyOmp9Np0DKN$OM6 z|6N@jUi#u_MX0Z@6xaC{y#v3B!VUc5^{W`CExTD~u#7jWQrfO?N=(cAeNXmt{gW%X z^5N;zeA9!0@9O`r_$>3MG~a&KE^{~8=}{ei{RbCKKK*e?zQDXSH(me4{7Jjuy)iuC z@=0+$v$lBmWCo?_dlP$hzPzmd_~5j*OW%5hzO}8<<+ZK)a^TU??u(0Dxo1fK^p2E& zsPBJift3DFn>qU>o3p=v<<5?=S|}VdS1#6i9Y5a{x4zkPmCRP%+I8sODY=)HFL%rp zX}a?#CcxzA`|@vH%1vwb=*;Qgx#s;A-%EL3t3P_2^K0yF<-HbjV@2JwKYZ^Go?Z7M zR#of2fThaLAl{I^>ccAB;isG-MW%`Og1BjI3!U4OPEXUFRjt2Vsz`yib@{w`Gn>uB zWiCnbinC{b$qE<#crvoX^UQ%AjEoz$UDBNs{Ibiy$~G+KWo~w+_9C?ttG-_U``0Qt zQjYh8S7KtCVh8Kp5U)m4`bdluGYAlO;7b?u_B8(!uKa4nRL zSik*RZr6L>&?i0DWJ5M|c}*8})?-f&6!BGkpqTA_cvtD`2Tz`~+}m4y_}H_H*9%g= zW?pcMcNx-?&#pf2SDeR3^#2S!?oJ?P%H&Wp#V^B|_^pmMWd@T9jc++cxg8cS>bR8^}iNlCnZQ6QjcWuI@&mAj!~56_=C zdt~a4Y=!Gr*jG*j4Si*EnzQPC*;K3XFR1YJk2L+2>jRc2%X2x;y_P1l?ntcr@~qs6 z6KA)6@2^#1sMvavJIh{JNkO08rhm?!oGst77&qAkZ1HK>ZU1jyIYVep%m=5(4`+OM zZ*s9L>osRY)wAM&I9?^~-S6HUY!OR7y8pe}eeslb9lig%KTY1St0&LKY1@MEut?2J zM+t5~prQ~ynk**?pdNxJIMo3j#E*1zmhocnr8 z^gNE-XL+lv-o5MV=+)h^r(5sat?K;wALcK(Bm5+LgZh*Bhw*|kvAW9>sg(vAr~Dl3bM(@hZO2qeCqV`INnowoY$ym|EqI5Waq_vNWHn< z@Yav&uKMF@sa?r^ujE`mi%C3LX!P1e-qj=imWW@~mxG!mU*D?y;nmPTa+^JEb^7&{ zQPwh*1&e|r40tv!nP2|@h`?QmJ^Z!b+7GfTPUe&31~o8JlzMjYHNV@nLvmJ!pKq-| zjk%n6z1{k3@u;iIC-}>oR~PPgbIKA+>*Z@n7d@MPJE~M~vR7MJOUhb>;(f1Mp@ z$gF(kOhRT_V{qW}0;5lBIz$xf8}m%9x_3SKu;b}7znKe?ekB@Y8rVMDWq0wyrbVwm zKMjlLtT=H?Fer~Zs>a64X5Iy{;3~tv#cDj~_^uh48z1JczweOrYO(i%EpOdtSC#y{ zwle$TqjkKeQY;e-f>TnSa(%kAVw<*1&e|<6-ONg-DEwd%i!|?u2q@fAB=F?$)nlyT zkMa*3KF2%dbJFp-_6Cd3rqtv-(LUe1vBF2Hr*T!{?~txF5@pwxeqY97GsDE=2CwK$ z6Or~-50> zbnfpQ0gETQ6wK|}Kl|5oU7L6I;;Gd;J9aEExqUzOuj`MRJu)9;G@>5eUOoHcoc{s= z_1`BQ7MMS?VV2+3hdFOs?4ACJJzP}c&fyxuvg0TB{E2m?)m`gcXr>RUAvjn_Fag2GV8?2v(9nKe22%QJ1eCgySajB8F3 zo86ypJuFkc=Knf&KrLePUWMDrOV2HB3T1Q=XbdeLE&AYbnOwc^-K7 zY}{M(2{Y%eHoutQy28eF#ecoadsW}}yU#EbJ2hEUjd$1SJ-b*Zv)yf(&(Sh>t!iIP z&w-oLhC2kOsW8?aHp#txVzbAOmtCH73xE2_nEff6`iIA`xi`08KW+uIW9c}F-NIX}H@ZnbFD zX5F-P_I)RQl<3rLHnx#jD>#2n=iByoi((tZnX<&6@F;lq^iFL(u_ZFHOGsp)=JlqJ z26OHiJl!?(QKZ)mX&)(-H!Nzr$HePSWq3aI^4QaHPoZdI<$dLNHqO)OSC;YCtnOHS zCiLk`vwbgaZc|%USLgC)RsQiP37IGD)e|)%K0UUq2-$de%EPm*qRl7RWZzbVZ*8BN z%D6KiR%kWXZj)CJc<<~K5?R=|JSMHYr~TvCHqGmoC!7yDbT8qOLgLYxCR5gbxcv3O zMo-BNn+(K{%dOrPBXVxSl$DDdNoSFDGV}Tz#TmU}M2`|9`2o&(p)< zw-27aDmVMzj+2YCpDhwUUjRsOC${4_}Ky+E)lEek|Fl{rQ|g@PoxHt{Gf1 zA?FipPKD>sC|X-uJ5A^t+hn%b@T~#0YP_Gs>$*OiID1yyopWpLoWd86n6G>-Jht|O z${R~bt;DaV*K5YgxY>!WGFsvtn`2wx{@2*GPi0xk%DE1jv!3h?T+4beYU@{_TV~&m zJXo;n5a*Zg$%-dVopfHmZL5ohZtDiQT^|_kMqdq>+&ejZaf@n8jrA8zuYi_cXT0y5 z^|b%_+O{uSd+S_~gMN5sh%+zf|Imw|a4ZCtt^owKr-U ze-!sr9+TfUug}BtyqKMi@2edPeU<$u&i!(ySHdymgyYn@Wic{_oimkecbxh?f7Na?&ngK* zX5LgVwtw2aqr_V7c$4zc`^;IpA5~>c`tfSrn^uqOcRQL+pIafq-@f$p1va}taqoJq z9pX!8&X?$~8+N_}cORO$%(>;yc_Zo@$ z8c#5tS#8{@>m$wm@sO z?w%)4nMF7nT~uTPDp#&pIU{aCptAHX%axPMPRnH_B`^D_b=1qiJ9xvy*UIz5&tA#a zG|<|&uH(ls=~IWiX)v z-;QQasefxW^^V}CivrHP(ax?;1+6!3-!xD(=E=QLryMuEy({}8GGd3oo31JGOYdH6`exg?^O8XF|M1#&%Lj@}9{yh} zFD~x>?b|mIAt5JWb-yJuX3Vhq>g3Tpp>5WbBQk3H5_daV6gcVC|Cn(0U5D|G$SAj+ zuG=+rcXt#7ZO=_|y(icAChX+yvf0gwu2WhazW1Fg`4r=N(D2HS7jOTY>`oE5!TmA+ zyNKcq(QB)W_s2c&k(wY_!{&NIK)5OBb?*wn-Io$ha&Z+r7KvkF&A(OOXcP0s!a?j8 zldVIPv5tX)UWdJ7PIqv(+LhDW4?S%TNl3Z(BF3im=>E^f^YV0mSf9yV_vfl|9$&?Z zX(m4N?dDFJH0i|2lbu3BLSK^S&Y9y={OrubB+F)Y{$=0qRrlZj{q1(XriMnxp1pfB zFD>!hl5nue>Z`Ly^8_~232j!Qe+BItE`O2#!oIa=ld7~fx7ow_`yyOg&Fj=RZCDV! zG_^!Sxj#l`)~ED>P+&uRl<8D(lxv5OZOFKl78*ZhPrmMc%0| zX4nWAnVL?`$<57tb!BDMr4Jt-I&<;zE;Y@*Hle$ln^)4PWzpirCof-~{OIxHo0kM+ zWOSAs>@YneGv#hap5)h#gg{X%a56f{&Z^g>I@-a{spE@XMc5Ds0-LZkaFq*la!db+GTwf|ye; zZ9Zkb_HcH#x%rjdoA-{BhhyH`oa3blo9cG@UArb`vf;XM&dQsGU&8qo9W}m?9`SA3 z(ey%tk^+ZpHPfBf-v_ysDTeo7P5qzpr>NKa{OcIM)bFQWE`PK7%H`{!@)No*{oYY< z{^&-@@9en;22J2E?Z zdX}j9&I&O?S|!5z(|XrhTo75G zZ~en~zE9w*-FtW2-Za~qw%vR4v1>k2^WMJjd0jStd2%G{m4kQZHA`+3)VVqBpr~1* zZRXpY)9NXLjt3-I9Hx|h4ftTQsY>kM?wW$I(~g~x6|j~V3KlY_-d)6D;@aotM!x5|G7`r{d=pnwJ`Tww{}<9QPub* zm#n|EZX2(Im@K;nBB2{4zbXK!Zvce`~uwBt53}mX!AeV)U@S}_s{J& z{`Y%K_>*03;UIdj)gV?mqx61#vF7LLuWQ^neO_6_%GpeHkKHrpMpRQ%)Q8v3Y&N&s z&fZy}+LXgPMY+9FT1<5AgDu@XGJ=i{?G4L3nC}R8i|Hfh!mW&HfghclC}^}Fvh6g|SMRf+=^63^JAf3eS{UQE#YnWb)tl z3Ubkt^om{EIwkM#6(DXWqdb^@1XBnn09&!*llmpl8gk^j{rH;yTDb}uyQ*p~l1Oy*67#t9C`Io;P>w=QAKoP4}bcJZS} zNm(~Hsn&it$UY~j^1%Vd+Ec2V)Jrs{AJb?81*4PHt9h1Dw^UxcIde%#CZ%ZIb>sB9=i`kTFctX%y@EV@Aro6 z-~~z=EMK+K*4iCp+|CiwvXpK6X*Idi3k4V6D|vh^wy$*Nm-Eh7j$ZE6KOy~VkNDa9 zKZLidXZhrxI;)*HU6`xR|0vGZwAN2>kdyXQ|_}Hosq`<;!c?rut=Ha!rwJ zOt9@+G0nt=Pvg@X1qBWcrltl31r8famU$iuPbR-!_h%DARHD0K0$cLL$gf!I634MuT@bE6n6%!2r=r4?n+a;vG&0R~*+ANYz=etVYk?e*St1eJw2| zZ*T8)ODvO)aO|r5tmZq%Vq(5xpU{tCB4gjOpM;1cdKUtaFNe901(qN1Wp;g3zz*524vs(oIYL+dx6l*tK^S9ccd4-^)Q5dZf6 zzQ387*^`G45B^F_Pd~nYcg{_tUk}^mE9M@Z^Xm3?{r&5<+OpS%@n#-y6O@uVwRA;n zObkb;>SA!kx7l&3uV;MO*H@uxJU4#i-{0rEe%~)G-MBq7>i&FmFDfWlaB;EwXInW{ zRaKM9Pbuqm{k&U#|6}%YzqyO__x)%x%fGi~iJqxP^a5r{Ny(6?s8ijyTR4Ry%-Q&4 zJU|Q6ADiafFu1iLk$J9t{XDz>e~NEyOlE(3b8~xVbJUZ^{r1b`Y^w|!t3Etnj5p@D z*|I{Km6i3&uh;7@pPOr48QkE(99U;ub9^{Sk(QoI6KR<`zx2qp78Z?zf6zL zQS&d=;kn&cxjFl~o?7VD?~?)-yM6r1uD~00r#K`z^8my0B}+_he7*H-$)5~upAQP` z5j;*#PA`sh3jfke2uj|Rys_c*vgqx3f9~DR-|ri|%qMa3-Y6Co2DRWB&%)P69aVG7 z5xc<8;lZk7aNyG5fkIL+Cfw2H?ioaXj>^h_`P{JfeT4%51756`796#8H(jD=XgJZIM7VW*weVxV6cZ127_xI;#MROeMk#zoHtio#$`s>%PYroB>{$TjC^@HJ|MNLO} zr?LI`xY6*yoE6Hv3Vt6ye%zz*z^Y6(Q^FvDVYb5Cw{NH3*ju%g^WDtran-$Md3QYK zT9qcr6dY=+UbW2E>Fq8?aaq6i`!%0=XJ4}a^TGL0Me=21l_^=*d)-xex4t|)QQ7^) zzrVjjJ)AeEo&CX(louqO8L=saGc`5!;v(1X9o`%6l)b&Rl`HeWL7r=#y6-GkDe@Xv zCr{YO>9FO|yoU8s!ZPY7w%@Dj{+;)t?1%C9mW7#@?x@xHzIEl{KEcp5_0*|T7Uven z@2|7jD1Vft_ko&DSC+t$T@5C0Zg213%`E@!)6>(M>gw+ACtaG9_aLA#Sdpnwu^>3% zEJNajNqHgnHD2!yYv_5*FRi&`*Xhf1p85GMVf-jL+bmZpVyVFIpv!TKin9+v!a}7i;}(uabb!BG@O~P4qmJ*6@>wo99kN)Fs{>h7b1{^R%q#x07IVdbMeaK*ilfuH40sxbIwh6>-4J zvix04-_L`yRtd){s9f_pa%|7MhOe{F~&3{?(ZtHI`Tx)wbl}r>Cdq zgs}^o8mT+1TC;ZTP4n&YC9ehTd# z+grW)!q|`O$Z$2Ez{#NPSXgMtZhttIxm+#A^lPNHgOJ0qjrV3`pGx(OP*^LpPCV8} zm}yDjf`$jS0)8+2zE1JheQSBZDRhaWZuj0V&H|tc(9uC4K#9poYKn!{B{g_;*2uw; z8Sr3%$7E=w$ik?qq_D)XbFosc5UipU>1de1#>&@a;yHK>I`E(2uhM_*s4c1U7#J8B NJYD@<);T3K0RSI>br=8u diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD deleted file mode 100644 index 6e4a8d71ba..0000000000 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ /dev/null @@ -1,160 +0,0 @@ -package(default_visibility = ["//visibility:private"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -load("//tensorflow:tensorflow.bzl", "py_test") - -py_test( - name = "estimator_test", - srcs = ["estimator_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/kfac/python/ops:fisher_estimator", - "//tensorflow/contrib/kfac/python/ops:layer_collection", - "//tensorflow/contrib/kfac/python/ops:utils", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:init_ops", - "//tensorflow/python:linalg_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:random_ops", - "//tensorflow/python:training", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - "//third_party/py/numpy", - ], -) - -py_test( - name = "fisher_factors_test", - srcs = ["fisher_factors_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/kfac/python/ops:fisher_blocks", - "//tensorflow/contrib/kfac/python/ops:fisher_factors", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:random_seed", - "//tensorflow/python:variables", - "//third_party/py/numpy", - ], -) - -py_test( - name = "fisher_blocks_test", - srcs = ["fisher_blocks_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/kfac/python/ops:fisher_blocks", - "//tensorflow/contrib/kfac/python/ops:layer_collection", - "//tensorflow/contrib/kfac/python/ops:linear_operator", - "//tensorflow/contrib/kfac/python/ops:utils", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:random_ops", - "//tensorflow/python:random_seed", - "//tensorflow/python:state_ops", - "//tensorflow/python:variables", - "//third_party/py/numpy", - ], -) - -py_test( - name = "layer_collection_test", - srcs = ["layer_collection_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/kfac/python/ops:fisher_blocks", - "//tensorflow/contrib/kfac/python/ops:fisher_factors", - "//tensorflow/contrib/kfac/python/ops:layer_collection", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:linalg_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:random_ops", - "//tensorflow/python:random_seed", - "//tensorflow/python:variable_scope", - ], -) - -py_test( - name = "optimizer_test", - srcs = ["optimizer_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/kfac/python/ops:fisher_factors", - "//tensorflow/contrib/kfac/python/ops:kfac_optimizer", - "//tensorflow/contrib/kfac/python/ops:layer_collection", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_ops", - "//tensorflow/python:init_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:nn", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - "//third_party/py/numpy", - ], -) - -py_test( - name = "utils_test", - srcs = ["utils_test.py"], - srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows - deps = [ - "//tensorflow/contrib/kfac/python/ops:utils", - "//tensorflow/contrib/tpu", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:linalg_ops", - "//tensorflow/python:random_seed", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - "//third_party/py/numpy", - ], -) - -py_test( - name = "op_queue_test", - srcs = ["op_queue_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/kfac/python/ops:op_queue", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", - ], -) - -py_test( - name = "loss_functions_test", - srcs = ["loss_functions_test.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/kfac/python/ops:loss_functions", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:framework_ops", - "//tensorflow/python:random_ops", - "//third_party/py/numpy", - ], -) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py b/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py deleted file mode 100644 index 76b31a5730..0000000000 --- a/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py +++ /dev/null @@ -1,310 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tf.contrib.kfac.estimator.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.kfac.python.ops import estimator -from tensorflow.contrib.kfac.python.ops import layer_collection as lc -from tensorflow.contrib.kfac.python.ops import utils -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables -from tensorflow.python.platform import test -from tensorflow.python.training import training_util - -_ALL_ESTIMATION_MODES = ["gradients", "empirical", "curvature_prop", "exact"] - - -class EstimatorTest(test.TestCase): - - def setUp(self): - self._graph = ops.Graph() - with self._graph.as_default(): - self.layer_collection = lc.LayerCollection() - - self.inputs = random_ops.random_normal((2, 2), dtype=dtypes.float32) - self.weights = variable_scope.get_variable( - "w", shape=(2, 2), dtype=dtypes.float32) - self.bias = variable_scope.get_variable( - "b", initializer=init_ops.zeros_initializer(), shape=(2, 1)) - self.output = math_ops.matmul(self.inputs, self.weights) + self.bias - - # Only register the weights. - self.layer_collection.register_fully_connected( - params=(self.weights,), inputs=self.inputs, outputs=self.output) - - self.outputs = math_ops.tanh(self.output) - self.targets = array_ops.zeros_like(self.outputs) - self.layer_collection.register_categorical_predictive_distribution( - logits=self.outputs, targets=self.targets) - - def testEstimatorInitManualRegistration(self): - with self._graph.as_default(): - # We should be able to build an estimator for only the registered vars. - estimator.FisherEstimatorRoundRobin( - variables=[self.weights], - cov_ema_decay=0.1, - damping=0.2, - layer_collection=self.layer_collection - ) - - # Check that we throw an error if we try to build an estimator for vars - # that were not manually registered. - with self.assertRaises(ValueError): - est = estimator.FisherEstimatorRoundRobin( - variables=[self.weights, self.bias], - cov_ema_decay=0.1, - damping=0.2, - layer_collection=self.layer_collection - ) - est.make_vars_and_create_op_thunks() - - # Check that we throw an error if we don't include registered variables, - # i.e. self.weights - with self.assertRaises(ValueError): - est = estimator.FisherEstimatorRoundRobin( - variables=[], - cov_ema_decay=0.1, - damping=0.2, - layer_collection=self.layer_collection) - est.make_vars_and_create_op_thunks() - - @test.mock.patch.object(utils.SubGraph, "variable_uses", return_value=42) - def testVariableWrongNumberOfUses(self, mock_uses): - with self.assertRaises(ValueError): - est = estimator.FisherEstimatorRoundRobin( - variables=[self.weights], - cov_ema_decay=0.1, - damping=0.2, - layer_collection=self.layer_collection) - est.make_vars_and_create_op_thunks() - - def testInvalidEstimationMode(self): - with self.assertRaises(ValueError): - est = estimator.FisherEstimatorRoundRobin( - variables=[self.weights], - cov_ema_decay=0.1, - damping=0.2, - layer_collection=self.layer_collection, - estimation_mode="not_a_real_mode") - est.make_vars_and_create_op_thunks() - - def testGradientsModeBuild(self): - with self._graph.as_default(): - est = estimator.FisherEstimatorRoundRobin( - variables=[self.weights], - cov_ema_decay=0.1, - damping=0.2, - layer_collection=self.layer_collection, - estimation_mode="gradients") - est.make_vars_and_create_op_thunks() - - def testEmpiricalModeBuild(self): - with self._graph.as_default(): - est = estimator.FisherEstimatorRoundRobin( - variables=[self.weights], - cov_ema_decay=0.1, - damping=0.2, - layer_collection=self.layer_collection, - estimation_mode="empirical") - est.make_vars_and_create_op_thunks() - - def testCurvaturePropModeBuild(self): - with self._graph.as_default(): - est = estimator.FisherEstimatorRoundRobin( - variables=[self.weights], - cov_ema_decay=0.1, - damping=0.2, - layer_collection=self.layer_collection, - estimation_mode="curvature_prop") - est.make_vars_and_create_op_thunks() - - def testExactModeBuild(self): - with self._graph.as_default(): - est = estimator.FisherEstimatorRoundRobin( - variables=[self.weights], - cov_ema_decay=0.1, - damping=0.2, - layer_collection=self.layer_collection, - estimation_mode="exact") - est.make_vars_and_create_op_thunks() - - def test_cov_update_thunks(self): - """Ensures covariance update ops run once per global_step.""" - with self._graph.as_default(), self.cached_session() as sess: - fisher_estimator = estimator.FisherEstimatorRoundRobin( - variables=[self.weights], - layer_collection=self.layer_collection, - damping=0.2, - cov_ema_decay=0.0) - - # Construct an op that executes one covariance update per step. - global_step = training_util.get_or_create_global_step() - (cov_variable_thunks, cov_update_op_thunks, _, - _) = fisher_estimator.create_ops_and_vars_thunks() - for thunk in cov_variable_thunks: - thunk() - cov_matrices = [ - fisher_factor.get_cov() - for fisher_factor in self.layer_collection.get_factors() - ] - cov_update_op = control_flow_ops.case( - [(math_ops.equal(global_step, i), thunk) - for i, thunk in enumerate(cov_update_op_thunks)]) - increment_global_step = global_step.assign_add(1) - - sess.run(variables.global_variables_initializer()) - initial_cov_values = sess.run(cov_matrices) - - # Ensure there's one update per covariance matrix. - self.assertEqual(len(cov_matrices), len(cov_update_op_thunks)) - - # Test is no-op if only 1 covariance matrix. - assert len(cov_matrices) > 1 - - for i in range(len(cov_matrices)): - # Compare new and old covariance values - new_cov_values = sess.run(cov_matrices) - is_cov_equal = [ - np.allclose(initial_cov_value, new_cov_value) - for (initial_cov_value, - new_cov_value) in zip(initial_cov_values, new_cov_values) - ] - num_cov_equal = sum(is_cov_equal) - - # Ensure exactly one covariance matrix changes per step. - self.assertEqual(num_cov_equal, len(cov_matrices) - i) - - # Run all covariance update ops. - sess.run(cov_update_op) - sess.run(increment_global_step) - - def test_round_robin_placement(self): - """Check if the ops and variables are placed on devices correctly.""" - with self._graph.as_default(): - fisher_estimator = estimator.FisherEstimatorRoundRobin( - variables=[self.weights], - layer_collection=self.layer_collection, - damping=0.2, - cov_ema_decay=0.0, - cov_devices=["/cpu:{}".format(i) for i in range(2)], - inv_devices=["/cpu:{}".format(i) for i in range(2)]) - - # Construct an op that executes one covariance update per step. - (cov_update_thunks, - inv_update_thunks) = fisher_estimator.make_vars_and_create_op_thunks( - scope="test") - cov_update_ops = tuple(thunk() for thunk in cov_update_thunks) - inv_update_ops = tuple(thunk() for thunk in inv_update_thunks) - self.assertEqual(cov_update_ops[0].device, "/device:CPU:0") - self.assertEqual(cov_update_ops[1].device, "/device:CPU:1") - self.assertEqual(inv_update_ops[0].device, "/device:CPU:0") - self.assertEqual(inv_update_ops[1].device, "/device:CPU:1") - cov_matrices = [ - fisher_factor.get_cov() - for fisher_factor in self.layer_collection.get_factors() - ] - inv_matrices = [ - matrix - for fisher_factor in self.layer_collection.get_factors() - for matrix in fisher_factor._matpower_by_exp_and_damping.values() - ] - self.assertEqual(cov_matrices[0].device, "/device:CPU:0") - self.assertEqual(cov_matrices[1].device, "/device:CPU:1") - # Inverse matrices need to be explicitly placed. - self.assertEqual(inv_matrices[0].device, "") - self.assertEqual(inv_matrices[1].device, "") - - def test_inv_update_thunks(self): - """Ensures inverse update ops run once per global_step.""" - with self._graph.as_default(), self.cached_session() as sess: - fisher_estimator = estimator.FisherEstimatorRoundRobin( - variables=[self.weights], - layer_collection=self.layer_collection, - damping=0.2, - cov_ema_decay=0.0) - - # Construct op that updates one inverse per global step. - global_step = training_util.get_or_create_global_step() - (cov_variable_thunks, _, inv_variable_thunks, - inv_update_op_thunks) = fisher_estimator.create_ops_and_vars_thunks() - for thunk in cov_variable_thunks: - thunk() - for thunk in inv_variable_thunks: - thunk() - inv_matrices = [ - matrix - for fisher_factor in self.layer_collection.get_factors() - for matrix in fisher_factor._matpower_by_exp_and_damping.values() - ] - inv_update_op = control_flow_ops.case( - [(math_ops.equal(global_step, i), thunk) - for i, thunk in enumerate(inv_update_op_thunks)]) - increment_global_step = global_step.assign_add(1) - - sess.run(variables.global_variables_initializer()) - initial_inv_values = sess.run(inv_matrices) - - # Ensure there's one update per inverse matrix. This is true as long as - # there's no fan-in/fan-out or parameter re-use. - self.assertEqual(len(inv_matrices), len(inv_update_op_thunks)) - - # Test is no-op if only 1 invariance matrix. - assert len(inv_matrices) > 1 - - # Assign each covariance matrix a value other than the identity. This - # ensures that the inverse matrices are updated to something different as - # well. - cov_matrices = [ - fisher_factor.get_cov() - for fisher_factor in self.layer_collection.get_factors() - ] - sess.run([ - cov_matrix.assign(2 * linalg_ops.eye(int(cov_matrix.shape[0]))) - for cov_matrix in cov_matrices - ]) - - for i in range(len(inv_matrices)): - # Compare new and old inverse values - new_inv_values = sess.run(inv_matrices) - is_inv_equal = [ - np.allclose(initial_inv_value, new_inv_value) - for (initial_inv_value, - new_inv_value) in zip(initial_inv_values, new_inv_values) - ] - num_inv_equal = sum(is_inv_equal) - - # Ensure exactly one inverse matrix changes per step. - self.assertEqual(num_inv_equal, len(inv_matrices) - i) - - # Run all inverse update ops. - sess.run(inv_update_op) - sess.run(increment_global_step) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py deleted file mode 100644 index f845def507..0000000000 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py +++ /dev/null @@ -1,1018 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tf.contrib.kfac.fisher_blocks.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.kfac.python.ops import fisher_blocks as fb -from tensorflow.contrib.kfac.python.ops import fisher_factors as ff -from tensorflow.contrib.kfac.python.ops import layer_collection as lc -from tensorflow.contrib.kfac.python.ops import linear_operator as lo -from tensorflow.contrib.kfac.python.ops import utils -from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variables as tf_variables -from tensorflow.python.platform import test - - -# We need to set these constants since the numerical values used in the tests -# were chosen when these used to be the defaults. -ff.set_global_constants(init_covariances_at_zero=False, - zero_debias=False, - init_inverses_at_zero=False) - -# TODO(b/78538100): As far as I can tell, all the tests that say "Make sure our -# inverse is something other than the identity" are actually broken. They never -# run the covariance update ops and so the inverse actually is the identity -# (possible plus the damping term, which would still make it a multiple of the -# identity). - - -def _make_psd(dim): - """Constructs a PSD matrix of the given dimension.""" - mat = np.ones((dim, dim), dtype=np.float32) - mat[np.arange(dim), np.arange(dim)] = 2. + np.arange(dim) - return array_ops.constant(mat) - - -class UtilsTest(test.TestCase): - - def testComputePiTracenorm(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - diag = ops.convert_to_tensor([1., 2., 0., 1.]) - left_factor = lo.LinearOperatorDiag(diag) - right_factor = lo.LinearOperatorFullMatrix(array_ops.ones([2, 2])) - - # pi is the sqrt of the left trace norm divided by the right trace norm - pi = fb.compute_pi_tracenorm(left_factor, right_factor) - - pi_val = sess.run(pi) - self.assertEqual(1., pi_val) - - -class FullFBTest(test.TestCase): - - def testFullFBInitSingleTensor(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) - block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - - self.assertAllEqual(params, block.tensors_to_compute_grads()) - - def testFullFBInitTensorTuple(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) - block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - - self.assertAllEqual(params, block.tensors_to_compute_grads()) - - def testInstantiateFactors(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) - block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - - grads = (params[0]**2, math_ops.sqrt(params[1])) - block.instantiate_factors(grads, 0.5) - - def testMultiplyInverseTuple(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) - block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - grads = (params[0]**2, math_ops.sqrt(params[1])) - block.instantiate_factors((grads,), 0.5) - block._factor.instantiate_cov_variables() - block.register_inverse() - block._factor.instantiate_inv_variables() - - # Make sure our inverse is something other than the identity. - sess.run(tf_variables.global_variables_initializer()) - sess.run(block._factor.make_inverse_update_ops()) - - vector = array_ops.ones(3,) * 2 - output = block.multiply_inverse(vector) - - self.assertAllClose(sess.run(vector * 2 / 3.), sess.run(output)) - - def testMultiplyInverseNotTuple(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - params = array_ops.constant([[1.], [2.]]) - block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - grads = params**2 - block.instantiate_factors((grads,), 0.5) - block._factor.instantiate_cov_variables() - block.register_inverse() - block._factor.instantiate_inv_variables() - - # Make sure our inverse is something other than the identity. - sess.run(tf_variables.global_variables_initializer()) - sess.run(block._factor.make_inverse_update_ops()) - - vector = array_ops.ones(2,) * 2 - output = block.multiply_inverse(vector) - - self.assertAllClose(sess.run(vector * 2 / 3.), sess.run(output)) - - def testMultiplyInverseAgainstExplicit(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) - block = fb.FullFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - grads = (array_ops.constant([2., 3.]), array_ops.constant(4.)) - damping = 0.5 - block.instantiate_factors((grads,), damping) - block._factor.instantiate_cov_variables() - block.register_inverse() - block._factor.instantiate_inv_variables() - - # Make sure our inverse is something other than the identity. - sess.run(state_ops.assign(block._factor._cov, _make_psd(3))) - sess.run(block._factor.make_inverse_update_ops()) - - v_flat = np.array([4., 5., 6.], dtype=np.float32) - vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) - output = block.multiply_inverse(vector) - output_flat = sess.run(utils.tensors_to_column(output)).ravel() - - full = sess.run(block.full_fisher_block()) - explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat) - - self.assertAllClose(output_flat, explicit) - - -class NaiveDiagonalFBTest(test.TestCase): - - def testNaiveDiagonalFBInitSingleTensor(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) - block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - - self.assertAllEqual(params, block.tensors_to_compute_grads()) - - def testNaiveDiagonalFBInitTensorTuple(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) - block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - - self.assertAllEqual(params, block.tensors_to_compute_grads()) - - def testInstantiateFactors(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) - block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - - grads = (params[0]**2, math_ops.sqrt(params[1])) - block.instantiate_factors(grads, 0.5) - - def testMultiplyInverseTuple(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) - block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - grads = (params[0]**2, math_ops.sqrt(params[1])) - block.instantiate_factors((grads,), 0.5) - block._factor.instantiate_cov_variables() - - # Make sure our inverse is something other than the identity. - sess.run(tf_variables.global_variables_initializer()) - sess.run(block._factor.make_inverse_update_ops()) - - vector = array_ops.ones(3,) * 2 - output = block.multiply_inverse(vector) - - self.assertAllClose(sess.run(vector * 2 / 3.), sess.run(output)) - - def testMultiplyInverseNotTuple(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - params = array_ops.constant([[1.], [2.]]) - block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - grads = params**2 - block.instantiate_factors((grads,), 0.5) - block._factor.instantiate_cov_variables() - - # Make sure our inverse is something other than the identity. - sess.run(tf_variables.global_variables_initializer()) - sess.run(block._factor.make_inverse_update_ops()) - vector = array_ops.ones(2,) * 2 - output = block.multiply_inverse(vector) - - self.assertAllClose(sess.run(vector * 2 / 3.), sess.run(output)) - - def testMultiplyInverseAgainstExplicit(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - params = (array_ops.constant([1., 2.]), array_ops.constant(3.)) - block = fb.NaiveDiagonalFB(lc.LayerCollection(), params) - block.register_additional_tower(32) - grads = (params[0]**2, math_ops.sqrt(params[1])) - damping = 0.5 - block.instantiate_factors((grads,), damping) - block._factor.instantiate_cov_variables() - - cov = array_ops.reshape(array_ops.constant([2., 3., 4.]), [-1, 1]) - sess.run(state_ops.assign(block._factor._cov, cov)) - sess.run(block._factor.make_inverse_update_ops()) - - v_flat = np.array([4., 5., 6.], dtype=np.float32) - vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) - output = block.multiply_inverse(vector) - output_flat = sess.run(utils.tensors_to_column(output)).ravel() - - full = sess.run(block.full_fisher_block()) - explicit = np.dot(np.linalg.inv(full + damping * np.eye(3)), v_flat) - self.assertAllClose(output_flat, explicit) - - -class FullyConnectedDiagonalFBTest(test.TestCase): - - def setUp(self): - super(FullyConnectedDiagonalFBTest, self).setUp() - - self.batch_size = 4 - self.input_size = 6 - self.output_size = 3 - - self.inputs = np.random.randn(self.batch_size, self.input_size).astype( - np.float32) - self.outputs = np.zeros([self.batch_size, self.output_size]).astype( - np.float32) - self.output_grads = np.random.randn(self.batch_size, - self.output_size).astype(np.float32) - self.w = np.random.randn(self.input_size, self.output_size).astype( - np.float32) - self.b = np.random.randn(self.output_size).astype(np.float32) - - def fisherApprox(self, has_bias=False): - """Fisher approximation using default inputs.""" - if has_bias: - inputs = np.concatenate( - [self.inputs, np.ones([self.batch_size, 1])], axis=1) - else: - inputs = self.inputs - return self.buildDiagonalFisherApproximation(inputs, self.output_grads) - - def buildDiagonalFisherApproximation(self, inputs, output_grads): - """Builds explicit diagonal Fisher approximation. - - Fisher's diagonal is (d loss / d w)'s elements squared for - d/dw = E[outer(input, output_grad)] - - where the expectation is taken over examples. - - Args: - inputs: np.array of shape [batch_size, input_size]. - output_grads: np.array of shape [batch_size, output_size]. - - Returns: - Diagonal np.array of shape [num_params, num_params] for num_params = - input_size * output_size. - """ - batch_size = inputs.shape[0] - assert output_grads.shape[0] == batch_size - input_size = inputs.shape[1] - output_size = output_grads.shape[1] - fisher_diag = np.zeros((input_size, output_size)) - for i in range(batch_size): - fisher_diag += np.square(np.outer(inputs[i], output_grads[i])) - return np.diag(fisher_diag.flatten()) / batch_size - - def testMultiply(self): - result, _ = self.runFisherBlockOps(self.w, [self.inputs], [self.outputs], - [self.output_grads]) - - # Construct Fisher-vector product. - expected_result = self.fisherApprox().dot(self.w.flatten()) - expected_result = expected_result.reshape( - [self.input_size, self.output_size]) - - self.assertAllClose(expected_result, result) - - def testMultiplyInverse(self): - _, result = self.runFisherBlockOps(self.w, [self.inputs], [self.outputs], - [self.output_grads]) - - # Construct inverse Fisher-vector product. - expected_result = np.linalg.inv(self.fisherApprox()).dot(self.w.flatten()) - expected_result = expected_result.reshape( - [self.input_size, self.output_size]) - - self.assertAllClose(expected_result, result) - - def testRegisterAdditionalTower(self): - """Ensure 1 big tower and 2 small towers are equivalent.""" - multiply_result_big, multiply_inverse_result_big = self.runFisherBlockOps( - self.w, [self.inputs], [self.outputs], [self.output_grads]) - multiply_result_small, multiply_inverse_result_small = ( - self.runFisherBlockOps(self.w, np.split(self.inputs, 2), - np.split(self.outputs, 2), - np.split(self.output_grads, 2))) - - self.assertAllClose(multiply_result_big, multiply_result_small) - self.assertAllClose(multiply_inverse_result_big, - multiply_inverse_result_small) - - def testMultiplyHasBias(self): - result, _ = self.runFisherBlockOps((self.w, self.b), [self.inputs], - [self.outputs], [self.output_grads]) - expected_result = self.fisherApprox(True).dot( - np.concatenate([self.w.flatten(), self.b.flatten()])) - expected_result = expected_result.reshape( - [self.input_size + 1, self.output_size]) - expected_result = (expected_result[:-1], expected_result[-1]) - - self.assertEqual(len(result), 2) - self.assertAllClose(expected_result[0], result[0]) - self.assertAllClose(expected_result[1], result[1]) - - def runFisherBlockOps(self, params, inputs, outputs, output_grads): - """Run Ops guaranteed by FisherBlock interface. - - Args: - params: Tensor or 2-tuple of Tensors. Represents weights or weights and - bias of this layer. - inputs: list of Tensors of shape [batch_size, input_size]. Inputs to - layer. - outputs: list of Tensors of shape [batch_size, output_size]. - Preactivations produced by layer. - output_grads: list of Tensors of shape [batch_size, output_size]. - Gradient of loss with respect to 'outputs'. - - Returns: - multiply_result: Result of FisherBlock.multiply(params) - multiply_inverse_result: Result of FisherBlock.multiply_inverse(params) - """ - with ops.Graph().as_default(), self.cached_session() as sess: - inputs = as_tensors(inputs) - outputs = as_tensors(outputs) - output_grads = as_tensors(output_grads) - params = as_tensors(params) - - block = fb.FullyConnectedDiagonalFB( - lc.LayerCollection(), has_bias=isinstance(params, (tuple, list))) - for (i, o) in zip(inputs, outputs): - block.register_additional_tower(i, o) - - block.instantiate_factors((output_grads,), damping=0.0) - block._factor.instantiate_cov_variables() - - sess.run(tf_variables.global_variables_initializer()) - sess.run(block._factor.make_covariance_update_op(0.0)) - multiply_result = sess.run(block.multiply(params)) - multiply_inverse_result = sess.run(block.multiply_inverse(params)) - - return multiply_result, multiply_inverse_result - - -class EmbeddingKFACFBTest(test.TestCase): - - def testInstantiateFactors(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - - # Create a Fisher Block. - vocab_size = 5 - block = fb.EmbeddingKFACFB(lc.LayerCollection(), vocab_size) - - # Add some examples. - inputs = array_ops.constant([[0, 1], [1, 2], [2, 3]]) - outputs = array_ops.constant([[0.], [1.], [2.]]) - block.register_additional_tower(inputs, outputs) - - # Instantiate factor's variables. Ensure it doesn't fail. - grads = outputs**2. - damping = array_ops.constant(0.) - block.instantiate_factors(((grads,),), damping) - - def testMultiplyInverse(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - - # Create a Fisher Block. - vocab_size = 5 - block = fb.EmbeddingKFACFB(lc.LayerCollection(), vocab_size) - - # Add some examples. - inputs = array_ops.constant([[0, 1], [1, 2], [2, 3]]) - outputs = array_ops.constant([[0.], [1.], [2.]]) - block.register_additional_tower(inputs, outputs) - - # Instantiate factor's variables. Ensure it doesn't fail. - grads = outputs**2. - damping = array_ops.constant(0.) - block.instantiate_factors(((grads,),), damping) - block._input_factor.instantiate_cov_variables() - block._output_factor.instantiate_cov_variables() - block.register_inverse() - block._input_factor.instantiate_inv_variables() - block._output_factor.instantiate_inv_variables() - - # Create a sparse update. - indices = array_ops.constant([1, 3, 4]) - values = array_ops.constant([[1.], [1.], [1.]]) - sparse_vector = ops.IndexedSlices( - values, indices, dense_shape=[vocab_size, 1]) - dense_vector = array_ops.reshape([0., 1., 0., 1., 1.], [vocab_size, 1]) - - # Compare Fisher-vector product against explicit result. - result = block.multiply_inverse(sparse_vector) - expected_result = linalg_ops.matrix_solve(block.full_fisher_block(), - dense_vector) - - sess.run(tf_variables.global_variables_initializer()) - self.assertAlmostEqual( - sess.run(expected_result[1]), sess.run(result.values[0])) - self.assertAlmostEqual( - sess.run(expected_result[3]), sess.run(result.values[1])) - self.assertAlmostEqual( - sess.run(expected_result[4]), sess.run(result.values[2])) - - -class FullyConnectedKFACBasicFBTest(test.TestCase): - - def testFullyConnectedKFACBasicFBInit(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - inputs = array_ops.constant([1., 2.]) - outputs = array_ops.constant([3., 4.]) - block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection()) - block.register_additional_tower(inputs, outputs) - - self.assertAllEqual([outputs], block.tensors_to_compute_grads()) - - def testInstantiateFactorsHasBias(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - inputs = array_ops.constant([[1., 2.], [3., 4.]]) - outputs = array_ops.constant([[3., 4.], [5., 6.]]) - block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=True) - block.register_additional_tower(inputs, outputs) - - grads = outputs**2 - block.instantiate_factors(((grads,),), 0.5) - - def testInstantiateFactorsNoBias(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - inputs = array_ops.constant([[1., 2.], [3., 4.]]) - outputs = array_ops.constant([[3., 4.], [5., 6.]]) - block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) - block.register_additional_tower(inputs, outputs) - - grads = outputs**2 - block.instantiate_factors(((grads,),), 0.5) - - def testMultiplyInverseTuple(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - inputs = array_ops.constant([[1., 2., 3.], [3., 4., 5.], [5., 6., 7.]]) - outputs = array_ops.constant([[3., 4.], [5., 6.]]) - block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) - block.register_additional_tower(inputs, outputs) - grads = outputs**2 - block.instantiate_factors(((grads,),), 0.5) - - block._input_factor.instantiate_cov_variables() - block._output_factor.instantiate_cov_variables() - block.register_inverse() - block._input_factor.instantiate_inv_variables() - block._output_factor.instantiate_inv_variables() - - # Make sure our inverse is something other than the identity. - sess.run(tf_variables.global_variables_initializer()) - sess.run(block._input_factor.make_inverse_update_ops()) - sess.run(block._output_factor.make_inverse_update_ops()) - - vector = ( - np.arange(2, 6).reshape(2, 2).astype(np.float32), # - np.arange(1, 3).reshape(2, 1).astype(np.float32)) - output = block.multiply_inverse((array_ops.constant(vector[0]), - array_ops.constant(vector[1]))) - - output = sess.run(output) - self.assertAllClose([[0.686291, 1.029437], [1.372583, 1.715729]], - output[0]) - self.assertAllClose([0.343146, 0.686291], output[1]) - - def testMultiplyInverseNotTuple(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - inputs = array_ops.constant([[1., 2.], [3., 4.]]) - outputs = array_ops.constant([[3., 4.], [5., 6.]]) - block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) - block.register_additional_tower(inputs, outputs) - grads = outputs**2 - block.instantiate_factors(((grads,),), 0.5) - block._input_factor.instantiate_cov_variables() - block._output_factor.instantiate_cov_variables() - block.register_inverse() - block._input_factor.instantiate_inv_variables() - block._output_factor.instantiate_inv_variables() - - # Make sure our inverse is something other than the identity. - sess.run(tf_variables.global_variables_initializer()) - sess.run(block._input_factor.make_inverse_update_ops()) - sess.run(block._output_factor.make_inverse_update_ops()) - - vector = np.arange(2, 6).reshape(2, 2).astype(np.float32) - output = block.multiply_inverse(array_ops.constant(vector)) - - self.assertAllClose([[0.686291, 1.029437], [1.372583, 1.715729]], - sess.run(output)) - - def testMultiplyInverseAgainstExplicit(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - input_dim, output_dim = 3, 2 - inputs = array_ops.zeros([32, input_dim]) - outputs = array_ops.zeros([32, output_dim]) - params = array_ops.zeros([input_dim, output_dim]) - block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) - block.register_additional_tower(inputs, outputs) - grads = outputs**2 - damping = 0. # This test is only valid without damping. - block.instantiate_factors(((grads,),), damping) - block._input_factor.instantiate_cov_variables() - block._output_factor.instantiate_cov_variables() - - sess.run(state_ops.assign(block._input_factor._cov, _make_psd(3))) - sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2))) - - block.register_inverse() - block._input_factor.instantiate_inv_variables() - block._output_factor.instantiate_inv_variables() - - sess.run(block._input_factor.make_inverse_update_ops()) - sess.run(block._output_factor.make_inverse_update_ops()) - - v_flat = np.arange(6, dtype=np.float32) - vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) - output = block.multiply_inverse(vector) - output_flat = sess.run(utils.tensors_to_column(output)).ravel() - - full = sess.run(block.full_fisher_block()) - explicit = np.dot(np.linalg.inv(full + damping * np.eye(6)), v_flat) - - self.assertAllClose(output_flat, explicit) - - -class ConvDiagonalFBTest(test.TestCase): - - def setUp(self): - super(ConvDiagonalFBTest, self).setUp() - - self.batch_size = 2 - self.height = 8 - self.width = 4 - self.input_channels = 6 - self.output_channels = 3 - self.kernel_size = 1 - - self.inputs = np.random.randn(self.batch_size, self.height, self.width, - self.input_channels).astype(np.float32) - self.outputs = np.zeros( - [self.batch_size, self.height, self.width, - self.output_channels]).astype(np.float32) - self.output_grads = np.random.randn( - self.batch_size, self.height, self.width, self.output_channels).astype( - np.float32) - self.w = np.random.randn(self.kernel_size, self.kernel_size, - self.input_channels, self.output_channels).astype( - np.float32) - self.b = np.random.randn(self.output_channels).astype(np.float32) - - def fisherApprox(self, has_bias=False): - """Fisher approximation using default inputs.""" - if has_bias: - inputs = np.concatenate( - [self.inputs, - np.ones([self.batch_size, self.height, self.width, 1])], - axis=-1) - else: - inputs = self.inputs - return self.buildDiagonalFisherApproximation(inputs, self.output_grads, - self.kernel_size) - - def buildDiagonalFisherApproximation(self, inputs, output_grads, kernel_size): - r"""Builds explicit diagonal Fisher approximation. - - Fisher's diagonal is (d loss / d w)'s elements squared for - d/dw = E[\sum_{loc} outer(input_{loc}, output_grad_{loc})] - - where the expectation is taken over examples and the sum over (x, y) - locations upon which the convolution is applied. - - Args: - inputs: np.array of shape [batch_size, height, width, input_channels]. - output_grads: np.array of shape [batch_size, height, width, - output_channels]. - kernel_size: int. height and width of kernel. - - Returns: - Diagonal np.array of shape [num_params, num_params] for num_params = - kernel_size^2 * input_channels * output_channels. - """ - batch_size, height, width, input_channels = inputs.shape - assert output_grads.shape[0] == batch_size - assert output_grads.shape[1] == height - assert output_grads.shape[2] == width - output_channels = output_grads.shape[3] - - # If kernel_size == 1, then we don't need to worry about capturing context - # around the pixel upon which a convolution is applied. This makes testing - # easier. - assert kernel_size == 1, "kernel_size != 1 isn't supported." - num_locations = height * width - inputs = np.reshape(inputs, [batch_size, num_locations, input_channels]) - output_grads = np.reshape(output_grads, - [batch_size, num_locations, output_channels]) - - fisher_diag = np.zeros((input_channels, output_channels)) - for i in range(batch_size): - # Each example's approximation is a square(sum-of-outer-products). - example_fisher_diag = np.zeros((input_channels, output_channels)) - for j in range(num_locations): - example_fisher_diag += np.outer(inputs[i, j], output_grads[i, j]) - fisher_diag += np.square(example_fisher_diag) - - # Normalize by batch_size (not num_locations). - return np.diag(fisher_diag.flatten()) / batch_size - - def testMultiply(self): - result, _ = self.runFisherBlockOps(self.w, [self.inputs], [self.outputs], - [self.output_grads]) - - # Construct Fisher-vector product. - expected_result = self.fisherApprox().dot(self.w.flatten()) - expected_result = expected_result.reshape([ - self.kernel_size, self.kernel_size, self.input_channels, - self.output_channels - ]) - - self.assertAllClose(expected_result, result) - - def testMultiplyInverse(self): - _, result = self.runFisherBlockOps(self.w, [self.inputs], [self.outputs], - [self.output_grads]) - - # Construct inverse Fisher-vector product. - expected_result = np.linalg.inv(self.fisherApprox()).dot(self.w.flatten()) - expected_result = expected_result.reshape([ - self.kernel_size, self.kernel_size, self.input_channels, - self.output_channels - ]) - - self.assertAllClose(expected_result, result, atol=1e-3) - - def testRegisterAdditionalTower(self): - """Ensure 1 big tower and 2 small towers are equivalent.""" - multiply_result_big, multiply_inverse_result_big = self.runFisherBlockOps( - self.w, [self.inputs], [self.outputs], [self.output_grads]) - multiply_result_small, multiply_inverse_result_small = ( - self.runFisherBlockOps(self.w, np.split(self.inputs, 2), - np.split(self.outputs, 2), - np.split(self.output_grads, 2))) - - self.assertAllClose(multiply_result_big, multiply_result_small) - self.assertAllClose(multiply_inverse_result_big, - multiply_inverse_result_small) - - def testMultiplyHasBias(self): - result, _ = self.runFisherBlockOps((self.w, self.b), [self.inputs], - [self.outputs], [self.output_grads]) - # Clone 'b' along 'input_channels' dimension. - b_filter = np.tile( - np.reshape(self.b, [1, 1, 1, self.output_channels]), - [self.kernel_size, self.kernel_size, 1, 1]) - params = np.concatenate([self.w, b_filter], axis=2) - expected_result = self.fisherApprox(True).dot(params.flatten()) - - # Extract 'b' from concatenated parameters. - expected_result = expected_result.reshape([ - self.kernel_size, self.kernel_size, self.input_channels + 1, - self.output_channels - ]) - expected_result = (expected_result[:, :, 0:-1, :], - np.reshape(expected_result[:, :, -1, :], - [self.output_channels])) - - self.assertEqual(len(result), 2) - self.assertAllClose(expected_result[0], result[0]) - self.assertAllClose(expected_result[1], result[1]) - - def runFisherBlockOps(self, params, inputs, outputs, output_grads): - """Run Ops guaranteed by FisherBlock interface. - - Args: - params: Tensor or 2-tuple of Tensors. Represents weights or weights and - bias of this layer. - inputs: list of Tensors of shape [batch_size, input_size]. Inputs to - layer. - outputs: list of Tensors of shape [batch_size, output_size]. - Preactivations produced by layer. - output_grads: list of Tensors of shape [batch_size, output_size]. - Gradient of loss with respect to 'outputs'. - - Returns: - multiply_result: Result of FisherBlock.multiply(params) - multiply_inverse_result: Result of FisherBlock.multiply_inverse(params) - """ - with ops.Graph().as_default(), self.cached_session() as sess: - inputs = as_tensors(inputs) - outputs = as_tensors(outputs) - output_grads = as_tensors(output_grads) - params = as_tensors(params) - - block = fb.ConvDiagonalFB( - lc.LayerCollection(), params, strides=[1, 1, 1, 1], padding='SAME') - for (i, o) in zip(inputs, outputs): - block.register_additional_tower(i, o) - - block.instantiate_factors((output_grads,), damping=0.0) - block._factor.instantiate_cov_variables() - - sess.run(tf_variables.global_variables_initializer()) - sess.run(block._factor.make_covariance_update_op(0.0)) - multiply_result = sess.run(block.multiply(params)) - multiply_inverse_result = sess.run(block.multiply_inverse(params)) - - return multiply_result, multiply_inverse_result - - -class DepthwiseConvKFCBasicFBTest(test.TestCase): - - def testInstantiateFactors(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - params = random_ops.random_normal((3, 3, 8, 2)) - inputs = random_ops.random_normal((32, 5, 5, 8)) - outputs = random_ops.random_normal((32, 5, 5, 16)) - layer_collection = lc.LayerCollection() - block = fb.DepthwiseConvKFCBasicFB( - layer_collection, params=params, strides=[1, 1, 1, 1], padding='SAME') - block.register_additional_tower(inputs, outputs) - grads = outputs**2 - block.instantiate_factors(([grads],), 0.5) - - def testMultiplyInverse(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - params = random_ops.random_normal((3, 3, 8, 2)) - inputs = random_ops.random_normal((32, 5, 5, 8)) - outputs = random_ops.random_normal((32, 5, 5, 16)) - layer_collection = lc.LayerCollection() - block = fb.DepthwiseConvKFCBasicFB( - layer_collection, params=params, strides=[1, 1, 1, 1], padding='SAME') - block.register_additional_tower(inputs, outputs) - grads = outputs**2 - block.instantiate_factors(([grads],), 0.5) - block._input_factor.instantiate_cov_variables() - block._output_factor.instantiate_cov_variables() - block.register_inverse() - block._input_factor.instantiate_inv_variables() - block._output_factor.instantiate_inv_variables() - - # Ensure inverse update op doesn't crash. - sess.run(tf_variables.global_variables_initializer()) - sess.run([ - factor.make_inverse_update_ops() - for factor in layer_collection.get_factors() - ]) - - # Ensure inverse-vector multiply doesn't crash. - output = block.multiply_inverse(params) - sess.run(output) - - # Ensure same shape. - self.assertAllEqual(output.shape, params.shape) - - -class ConvKFCBasicFBTest(test.TestCase): - - def _testConvKFCBasicFBInitParams(self, params): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - if isinstance(params, (list, tuple)): - params = [array_ops.constant(param) for param in params] - else: - params = array_ops.constant(params) - inputs = random_ops.random_normal((2, 2, 2)) - outputs = random_ops.random_normal((2, 2, 2)) - block = fb.ConvKFCBasicFB( - lc.LayerCollection(), params=params, padding='SAME') - block.register_additional_tower(inputs, outputs) - - self.assertAllEqual([outputs], block.tensors_to_compute_grads()) - - def testConvKFCBasicFBInitParamsParamsTuple(self): - self._testConvKFCBasicFBInitParams([np.ones([1, 2, 2]), np.ones([2])]) - - def testConvKFCBasicFBInitParamsParamsSingle(self): - self._testConvKFCBasicFBInitParams([np.ones([1, 2, 2])]) - - def testMultiplyInverseTuple(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - params = random_ops.random_normal((2, 2, 2, 2)) - inputs = random_ops.random_normal((2, 2, 2, 2)) - outputs = random_ops.random_normal((2, 2, 2, 2)) - block = fb.ConvKFCBasicFB( - lc.LayerCollection(), params=params, padding='SAME') - block.register_additional_tower(inputs, outputs) - grads = outputs**2 - block.instantiate_factors(((grads,),), 0.5) - block._input_factor.instantiate_cov_variables() - block._output_factor.instantiate_cov_variables() - block.register_inverse() - block._input_factor.instantiate_inv_variables() - block._output_factor.instantiate_inv_variables() - - # Make sure our inverse is something other than the identity. - sess.run(tf_variables.global_variables_initializer()) - sess.run(block._input_factor.make_inverse_update_ops()) - sess.run(block._output_factor.make_inverse_update_ops()) - - vector = (np.arange(1, 15).reshape(7, 2).astype(np.float32), - np.arange(2, 4).reshape(2, 1).astype(np.float32)) - output = block.multiply_inverse((array_ops.constant(vector[0]), - array_ops.constant(vector[1]))) - - output = sess.run(output) - self.assertAllClose([0.136455, 0.27291], output[0][0]) - self.assertAllClose([0.27291, 0.409365], output[1]) - - def testMultiplyInverseNotTuple(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - params = random_ops.random_normal((2, 2, 2, 2)) - inputs = random_ops.random_normal((2, 2, 2, 2)) - outputs = random_ops.random_normal((2, 2, 2, 2)) - block = fb.ConvKFCBasicFB( - lc.LayerCollection(), params=params, padding='SAME') - block.register_additional_tower(inputs, outputs) - self.assertFalse(block._has_bias) - grads = outputs**2 - block.instantiate_factors(((grads,),), 0.5) - block._input_factor.instantiate_cov_variables() - block._output_factor.instantiate_cov_variables() - block.register_inverse() - block._input_factor.instantiate_inv_variables() - block._output_factor.instantiate_inv_variables() - - # Make sure our inverse is something other than the identity. - sess.run(tf_variables.global_variables_initializer()) - sess.run(block._input_factor.make_inverse_update_ops()) - sess.run(block._output_factor.make_inverse_update_ops()) - - vector = np.arange(1, 17).reshape(8, 2).astype(np.float32) - output = block.multiply_inverse(array_ops.constant(vector)) - - self.assertAllClose([0.136455, 0.27291], sess.run(output)[0]) - - def testMultiplyInverseNotTupleWithBias(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - params = [random_ops.random_normal((2, 2, 2, 2))] - inputs = random_ops.random_normal((2, 2, 2, 2)) - outputs = random_ops.random_normal((2, 2, 2, 2)) - block = fb.ConvKFCBasicFB( - lc.LayerCollection(), params=params, padding='SAME') - block.register_additional_tower(inputs, outputs) - self.assertTrue(block._has_bias) - grads = outputs**2 - block.instantiate_factors(((grads,),), 0.5) - block._input_factor.instantiate_cov_variables() - block._output_factor.instantiate_cov_variables() - block.register_inverse() - block._input_factor.instantiate_inv_variables() - block._output_factor.instantiate_inv_variables() - - # Make sure our inverse is something other than the identity. - sess.run(tf_variables.global_variables_initializer()) - sess.run(block._input_factor.make_inverse_update_ops()) - sess.run(block._output_factor.make_inverse_update_ops()) - - vector = np.arange(1, 19).reshape(9, 2).astype(np.float32) - output = block.multiply_inverse(array_ops.constant(vector)) - - self.assertAllClose([0.136455, 0.27291], sess.run(output)[0]) - - def testMultiplyInverseAgainstExplicit(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - params = array_ops.zeros((2, 2, 2, 2)) - inputs = array_ops.zeros((2, 2, 2, 2)) - outputs = array_ops.zeros((2, 2, 2, 2)) - block = fb.ConvKFCBasicFB( - lc.LayerCollection(), params=params, padding='SAME') - block.register_additional_tower(inputs, outputs) - grads = outputs**2 - damping = 0. # This test is only valid without damping. - block.instantiate_factors(((grads,),), damping) - block._input_factor.instantiate_cov_variables() - block._output_factor.instantiate_cov_variables() - block.register_inverse() - block._input_factor.instantiate_inv_variables() - block._output_factor.instantiate_inv_variables() - - sess.run(state_ops.assign(block._input_factor._cov, _make_psd(8))) - sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2))) - sess.run(block._input_factor.make_inverse_update_ops()) - sess.run(block._output_factor.make_inverse_update_ops()) - - v_flat = np.arange(16, dtype=np.float32) - vector = utils.column_to_tensors(params, array_ops.constant(v_flat)) - output = block.multiply_inverse(vector) - output_flat = sess.run(utils.tensors_to_column(output)).ravel() - - full = sess.run(block.full_fisher_block()) - explicit = np.dot(np.linalg.inv(full + damping * np.eye(16)), v_flat) - - self.assertAllClose(output_flat, explicit) - - -class FullyConnectedSeriesFBTest(test.TestCase): - - def testFullyConnectedSeriesFBInit(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - inputs = array_ops.constant([1., 2.]) - outputs = array_ops.constant([3., 4.]) - block = fb.FullyConnectedSeriesFB(lc.LayerCollection()) - block.register_additional_tower([inputs], [outputs]) - self.assertAllEqual([[outputs]], block.tensors_to_compute_grads()) - - def testInstantiateFactorsHasBias(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - inputs = array_ops.constant([[1., 2.], [3., 4.]]) - outputs = array_ops.constant([[3., 4.], [5., 6.]]) - block = fb.FullyConnectedSeriesFB( - lc.LayerCollection(), - has_bias=True) - block.register_additional_tower([inputs], [outputs]) - grads = outputs**2 - block.instantiate_factors((((grads,),),), 0.5) - - def testInstantiateFactorsNoBias(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - inputs = array_ops.constant([[1., 2.], [3., 4.]]) - outputs = array_ops.constant([[3., 4.], [5., 6.]]) - block = fb.FullyConnectedSeriesFB( - lc.LayerCollection(), - has_bias=False) - block.register_additional_tower([inputs], [outputs]) - grads = outputs**2 - block.instantiate_factors((((grads,),),), 0.5) - - -def as_tensors(tensor_or_tuple): - """Converts a potentially nested tuple of np.array to Tensors.""" - if isinstance(tensor_or_tuple, (tuple, list)): - return tuple(as_tensors(t) for t in tensor_or_tuple) - return ops.convert_to_tensor(tensor_or_tuple) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py deleted file mode 100644 index a396ca3f85..0000000000 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py +++ /dev/null @@ -1,955 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tf.contrib.kfac.fisher_factors.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import numpy.random as npr - -from tensorflow.contrib.kfac.python.ops import fisher_blocks as fb -from tensorflow.contrib.kfac.python.ops import fisher_factors as ff -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops as tf_ops -from tensorflow.python.framework import random_seed -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradients_impl -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import variables as tf_variables -from tensorflow.python.platform import test - - -# We need to set these constants since the numerical values used in the tests -# were chosen when these used to be the defaults. -ff.set_global_constants(init_covariances_at_zero=False, - zero_debias=False, - init_inverses_at_zero=False) - - -def make_damping_func(damping): - return fb._package_func(lambda: damping, damping) - - -class FisherFactorTestingDummy(ff.FisherFactor): - """Dummy class to test the non-abstract methods on ff.FisherFactor.""" - - @property - def _var_scope(self): - return 'dummy/a_b_c' - - @property - def _cov_shape(self): - raise NotImplementedError - - @property - def _num_sources(self): - return 1 - - @property - def _dtype(self): - return dtypes.float32 - - def _compute_new_cov(self): - raise NotImplementedError - - def instantiate_covariance(self): - pass - - def make_inverse_update_ops(self): - return [] - - def get_cov(self): - return NotImplementedError - - def instantiate_inv_variables(self): - return NotImplementedError - - def _num_towers(self): - raise NotImplementedError - - def _get_data_device(self): - raise NotImplementedError - - def register_matpower(self, exp, damping_func): - raise NotImplementedError - - def register_cholesky(self, damping_func): - raise NotImplementedError - - def register_cholesky_inverse(self, damping_func): - raise NotImplementedError - - def get_matpower(self, exp, damping_func): - raise NotImplementedError - - def get_cholesky(self, damping_func): - raise NotImplementedError - - def get_cholesky_inverse(self, damping_func): - raise NotImplementedError - - def get_cov_as_linear_operator(self): - raise NotImplementedError - - -class DenseSquareMatrixFactorTestingDummy(ff.DenseSquareMatrixFactor): - """Dummy class to test the non-abstract methods on ff.DenseSquareMatrixFactor. - """ - - def __init__(self, shape): - self._shape = shape - super(DenseSquareMatrixFactorTestingDummy, self).__init__() - - @property - def _var_scope(self): - return 'dummy/a_b_c' - - @property - def _cov_shape(self): - return self._shape - - @property - def _num_sources(self): - return 1 - - @property - def _dtype(self): - return dtypes.float32 - - def _compute_new_cov(self): - raise NotImplementedError - - def instantiate_covariance(self): - pass - - def _num_towers(self): - raise NotImplementedError - - def _get_data_device(self): - raise NotImplementedError - - -class NumericalUtilsTest(test.TestCase): - - def testComputeCovAgainstNumpy(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - npr.seed(0) - random_seed.set_random_seed(200) - - x = npr.randn(100, 3) - cov = ff.compute_cov(array_ops.constant(x)) - np_cov = np.dot(x.T, x) / x.shape[0] - - self.assertAllClose(sess.run(cov), np_cov) - - def testComputeCovAgainstNumpyWithAlternativeNormalizer(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - npr.seed(0) - random_seed.set_random_seed(200) - - normalizer = 10. - x = npr.randn(100, 3) - cov = ff.compute_cov(array_ops.constant(x), normalizer=normalizer) - np_cov = np.dot(x.T, x) / normalizer - - self.assertAllClose(sess.run(cov), np_cov) - - def testAppendHomog(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - npr.seed(0) - - m, n = 3, 4 - a = npr.randn(m, n) - a_homog = ff.append_homog(array_ops.constant(a)) - np_result = np.hstack([a, np.ones((m, 1))]) - - self.assertAllClose(sess.run(a_homog), np_result) - - -class NameStringUtilFunctionTest(test.TestCase): - - def _make_tensor(self): - x = array_ops.placeholder(dtypes.float64, (3, 1)) - w = array_ops.constant(npr.RandomState(0).randn(3, 3)) - y = math_ops.matmul(w, x) - g = gradients_impl.gradients(y, x)[0] - return g - - def testScopeStringFromParamsSingleTensor(self): - with tf_ops.Graph().as_default(): - g = self._make_tensor() - scope_string = ff.scope_string_from_params(g) - self.assertEqual('gradients_MatMul_grad_MatMul_1', scope_string) - - def testScopeStringFromParamsMultipleTensors(self): - with tf_ops.Graph().as_default(): - x = array_ops.constant(1,) - y = array_ops.constant(2,) - scope_string = ff.scope_string_from_params((x, y)) - self.assertEqual('Const_Const_1', scope_string) - - def testScopeStringFromParamsMultipleTypes(self): - with tf_ops.Graph().as_default(): - x = array_ops.constant(1,) - y = array_ops.constant(2,) - scope_string = ff.scope_string_from_params([[1, 2, 3], 'foo', True, 4, - (x, y)]) - self.assertEqual('1-2-3_foo_True_4_Const__Const_1', scope_string) - - def testScopeStringFromParamsUnsupportedType(self): - with tf_ops.Graph().as_default(): - x = array_ops.constant(1,) - y = array_ops.constant(2,) - unsupported = 1.2 # Floats are not supported. - with self.assertRaises(ValueError): - ff.scope_string_from_params([[1, 2, 3], 'foo', True, 4, (x, y), - unsupported]) - - def testScopeStringFromName(self): - with tf_ops.Graph().as_default(): - g = self._make_tensor() - scope_string = ff.scope_string_from_name(g) - self.assertEqual('gradients_MatMul_grad_MatMul_1', scope_string) - - def testScalarOrTensorToString(self): - with tf_ops.Graph().as_default(): - self.assertEqual(ff.scalar_or_tensor_to_string(5.), repr(5.)) - - g = self._make_tensor() - scope_string = ff.scope_string_from_name(g) - self.assertEqual(ff.scalar_or_tensor_to_string(g), scope_string) - - -class FisherFactorTest(test.TestCase): - - def testMakeInverseUpdateOps(self): - with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - factor = FisherFactorTestingDummy() - - self.assertEqual(0, len(factor.make_inverse_update_ops())) - - -class DenseSquareMatrixFactorTest(test.TestCase): - - def testRegisterDampedInverse(self): - with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - shape = [2, 2] - factor = DenseSquareMatrixFactorTestingDummy(shape) - factor_var_scope = 'dummy/a_b_c' - - damping_funcs = [make_damping_func(0.1), - make_damping_func(0.1), - make_damping_func(1e-5), - make_damping_func(1e-5)] - for damping_func in damping_funcs: - factor.register_inverse(damping_func) - - factor.instantiate_inv_variables() - - inv = factor.get_inverse(damping_funcs[0]).to_dense() - self.assertEqual(inv, factor.get_inverse(damping_funcs[1]).to_dense()) - self.assertNotEqual(inv, factor.get_inverse(damping_funcs[2]).to_dense()) - self.assertEqual(factor.get_inverse(damping_funcs[2]).to_dense(), - factor.get_inverse(damping_funcs[3]).to_dense()) - factor_vars = tf_ops.get_collection(tf_ops.GraphKeys.GLOBAL_VARIABLES, - factor_var_scope) - factor_tensors = (tf_ops.convert_to_tensor(var) for var in factor_vars) - - self.assertEqual(set([inv, - factor.get_inverse(damping_funcs[2]).to_dense()]), - set(factor_tensors)) - self.assertEqual(shape, inv.get_shape()) - - def testRegisterMatpower(self): - with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - shape = [3, 3] - factor = DenseSquareMatrixFactorTestingDummy(shape) - factor_var_scope = 'dummy/a_b_c' - - # TODO(b/74201126): Change to using the same func for both once - # Topohash is in place. - damping_func_1 = make_damping_func(0.5) - damping_func_2 = make_damping_func(0.5) - - factor.register_matpower(-0.5, damping_func_1) - factor.register_matpower(2, damping_func_2) - - factor.instantiate_inv_variables() - - factor_vars = tf_ops.get_collection(tf_ops.GraphKeys.GLOBAL_VARIABLES, - factor_var_scope) - - factor_tensors = (tf_ops.convert_to_tensor(var) for var in factor_vars) - - matpower1 = factor.get_matpower(-0.5, damping_func_1).to_dense() - matpower2 = factor.get_matpower(2, damping_func_2).to_dense() - - self.assertEqual(set([matpower1, matpower2]), set(factor_tensors)) - - self.assertEqual(shape, matpower1.get_shape()) - self.assertEqual(shape, matpower2.get_shape()) - - def testMakeInverseUpdateOps(self): - with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - factor = FisherFactorTestingDummy() - - self.assertEqual(0, len(factor.make_inverse_update_ops())) - - def testMakeInverseUpdateOpsManyInversesEigenDecomp(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - cov = np.array([[1., 2.], [3., 4.]]) - factor = DenseSquareMatrixFactorTestingDummy(cov.shape) - factor._cov = array_ops.constant(cov, dtype=dtypes.float32) - - damping_funcs = [] - for i in range(1, ff.EIGENVALUE_DECOMPOSITION_THRESHOLD + 1): - damping_funcs.append(make_damping_func(1./i)) - - for i in range(ff.EIGENVALUE_DECOMPOSITION_THRESHOLD): - factor.register_inverse(damping_funcs[i]) - - factor.instantiate_inv_variables() - ops = factor.make_inverse_update_ops() - self.assertEqual(1, len(ops)) - - sess.run(tf_variables.global_variables_initializer()) - new_invs = [] - sess.run(ops) - for i in range(ff.EIGENVALUE_DECOMPOSITION_THRESHOLD): - # The inverse op will assign the damped inverse of cov to the inv var. - new_invs.append( - sess.run(factor.get_inverse(damping_funcs[i]).to_dense())) - - # We want to see that the new invs are all different from each other. - for i in range(len(new_invs)): - for j in range(i + 1, len(new_invs)): - # Just check the first element. - self.assertNotEqual(new_invs[i][0][0], new_invs[j][0][0]) - - def testMakeInverseUpdateOpsMatPowerEigenDecomp(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - cov = np.array([[6., 2.], [2., 4.]]) - factor = DenseSquareMatrixFactorTestingDummy(cov.shape) - factor._cov = array_ops.constant(cov, dtype=dtypes.float32) - exp = 2 # NOTE(mattjj): must be int to test with np.linalg.matrix_power - damping = 0.5 - damping_func = make_damping_func(damping) - - factor.register_matpower(exp, damping_func) - factor.instantiate_inv_variables() - ops = factor.make_inverse_update_ops() - self.assertEqual(1, len(ops)) - - sess.run(tf_variables.global_variables_initializer()) - sess.run(ops[0]) - matpower = sess.run(factor.get_matpower(exp, damping_func).to_dense()) - matpower_np = np.linalg.matrix_power(cov + np.eye(2) * damping, exp) - self.assertAllClose(matpower, matpower_np) - - def testMakeInverseUpdateOpsNoEigenDecomp(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - cov = np.array([[5., 2.], [2., 4.]]) # NOTE(mattjj): must be symmetric - factor = DenseSquareMatrixFactorTestingDummy(cov.shape) - factor._cov = array_ops.constant(cov, dtype=dtypes.float32) - - damping_func = make_damping_func(0) - - factor.register_inverse(damping_func) - factor.instantiate_inv_variables() - ops = factor.make_inverse_update_ops() - self.assertEqual(1, len(ops)) - - sess.run(tf_variables.global_variables_initializer()) - # The inverse op will assign the damped inverse of cov to the inv var. - old_inv = sess.run(factor.get_inverse(damping_func).to_dense()) - self.assertAllClose( - sess.run(ff.inverse_initializer(cov.shape, dtypes.float32)), old_inv) - - sess.run(ops) - new_inv = sess.run(factor.get_inverse(damping_func).to_dense()) - self.assertAllClose(new_inv, np.linalg.inv(cov)) - - -class FullFactorTest(test.TestCase): - - def testFullFactorInit(self): - with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), name='a/b/c') - factor = ff.FullFactor((tensor,), 32) - factor.instantiate_cov_variables() - self.assertEqual([6, 6], factor.get_cov().get_shape().as_list()) - - def testFullFactorInitFloat64(self): - with tf_ops.Graph().as_default(): - dtype = dtypes.float64_ref - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') - factor = ff.FullFactor((tensor,), 32) - factor.instantiate_cov_variables() - cov = factor.get_cov() - self.assertEqual(cov.dtype, dtype) - self.assertEqual([6, 6], cov.get_shape().as_list()) - - def testMakeCovarianceUpdateOp(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - tensor = array_ops.constant([1., 2.], name='a/b/c') - factor = ff.FullFactor((tensor,), 2) - factor.instantiate_cov_variables() - - sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(factor.make_covariance_update_op(.5)) - self.assertAllClose([[0.75, 0.5], [0.5, 1.5]], new_cov) - - -class NaiveDiagonalFactorTest(test.TestCase): - - def testNaiveDiagonalFactorInit(self): - with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), name='a/b/c') - factor = ff.NaiveDiagonalFactor((tensor,), 32) - factor.instantiate_cov_variables() - self.assertEqual([6, 1], factor.get_cov().get_shape().as_list()) - - def testNaiveDiagonalFactorInitFloat64(self): - with tf_ops.Graph().as_default(): - dtype = dtypes.float64_ref - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') - factor = ff.NaiveDiagonalFactor((tensor,), 32) - factor.instantiate_cov_variables() - cov = factor.get_cov() - self.assertEqual(cov.dtype, dtype) - self.assertEqual([6, 1], cov.get_shape().as_list()) - - def testMakeCovarianceUpdateOp(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - tensor = array_ops.constant([1., 2.], name='a/b/c') - factor = ff.NaiveDiagonalFactor((tensor,), 2) - factor.instantiate_cov_variables() - - sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(factor.make_covariance_update_op(.5)) - self.assertAllClose([[0.75], [1.5]], new_cov) - - -class EmbeddingInputKroneckerFactorTest(test.TestCase): - - def testInitialization(self): - with tf_ops.Graph().as_default(): - input_ids = array_ops.constant([[0], [1], [4]]) - vocab_size = 5 - factor = ff.EmbeddingInputKroneckerFactor((input_ids,), vocab_size) - factor.instantiate_cov_variables() - cov = factor.get_cov() - self.assertEqual(cov.shape.as_list(), [vocab_size]) - - def testCovarianceUpdateOp(self): - with tf_ops.Graph().as_default(): - input_ids = array_ops.constant([[0], [1], [4]]) - vocab_size = 5 - factor = ff.EmbeddingInputKroneckerFactor((input_ids,), vocab_size) - factor.instantiate_cov_variables() - cov_update_op = factor.make_covariance_update_op(0.0) - - with self.cached_session() as sess: - sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(cov_update_op) - self.assertAllClose(np.array([1., 1., 0., 0., 1.]) / 3., new_cov) - - -class ConvDiagonalFactorTest(test.TestCase): - - def setUp(self): - self.batch_size = 10 - self.height = self.width = 32 - self.in_channels = 3 - self.out_channels = 1 - self.kernel_height = self.kernel_width = 3 - self.strides = [1, 2, 2, 1] - self.data_format = 'NHWC' - self.padding = 'SAME' - self.kernel_shape = [ - self.kernel_height, self.kernel_width, self.in_channels, - self.out_channels - ] - - def testInit(self): - with tf_ops.Graph().as_default(): - inputs = random_ops.random_uniform( - [self.batch_size, self.height, self.width, self.in_channels]) - outputs_grads = [ - random_ops.random_uniform([ - self.batch_size, self.height // self.strides[1], - self.width // self.strides[2], self.out_channels - ]) for _ in range(3) - ] - - factor = ff.ConvDiagonalFactor( - (inputs,), - (outputs_grads,), - self.kernel_shape, - self.strides, - self.padding, - data_format=self.data_format) - factor.instantiate_cov_variables() - - # Ensure covariance matrix's shape makes sense. - self.assertEqual([ - self.kernel_height * self.kernel_width * self.in_channels, - self.out_channels - ], - factor.get_cov().shape.as_list()) - - def testMakeCovarianceUpdateOp(self): - with tf_ops.Graph().as_default(): - # Construct all arguments such that convolution kernel is applied in - # exactly one spatial location. - inputs = np.random.randn( - 1, # batch_size - self.kernel_height, - self.kernel_width, - self.in_channels) # in_channels - outputs_grad = np.random.randn( - 1, # batch_size - 1, # output_height - 1, # output_width - self.out_channels) - - factor = ff.ConvDiagonalFactor( - (constant_op.constant(inputs),), - ((constant_op.constant(outputs_grad),),), - self.kernel_shape, - strides=[1, 1, 1, 1], - padding='VALID') - factor.instantiate_cov_variables() - - # Completely forget initial value on first update. - cov_update_op = factor.make_covariance_update_op(0.0) - - # Ensure new covariance value is same as outer-product of inputs/outputs - # vectorized, squared. - with self.cached_session() as sess: - sess.run(tf_variables.global_variables_initializer()) - cov = sess.run(cov_update_op) - expected_cov = np.outer(inputs.flatten(), outputs_grad.flatten())**2 - self.assertAllClose(expected_cov, cov) - - def testHasBias(self): - with tf_ops.Graph().as_default(): - inputs = random_ops.random_uniform( - [self.batch_size, self.height, self.width, self.in_channels]) - outputs_grads = [ - random_ops.random_uniform([ - self.batch_size, self.height // self.strides[1], - self.width // self.strides[2], self.out_channels - ]) for _ in range(3) - ] - - factor = ff.ConvDiagonalFactor( - (inputs,), - (outputs_grads,), - self.kernel_shape, - self.strides, - self.padding, - data_format=self.data_format, - has_bias=True) - factor.instantiate_cov_variables() - - # Ensure shape accounts for bias. - self.assertEqual([ - self.kernel_height * self.kernel_width * self.in_channels + 1, - self.out_channels - ], - factor.get_cov().shape.as_list()) - - # Ensure update op doesn't crash. - cov_update_op = factor.make_covariance_update_op(0.0) - with self.cached_session() as sess: - sess.run(tf_variables.global_variables_initializer()) - sess.run(cov_update_op) - - -class FullyConnectedKroneckerFactorTest(test.TestCase): - - def _testFullyConnectedKroneckerFactorInit(self, - has_bias, - final_shape, - dtype=dtypes.float32_ref): - with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') - factor = ff.FullyConnectedKroneckerFactor(((tensor,),), has_bias=has_bias) - factor.instantiate_cov_variables() - cov = factor.get_cov() - self.assertEqual(cov.dtype, dtype) - self.assertEqual(final_shape, cov.get_shape().as_list()) - - def testFullyConnectedKroneckerFactorInitNoBias(self): - for dtype in (dtypes.float32_ref, dtypes.float64_ref): - self._testFullyConnectedKroneckerFactorInit(False, [3, 3], dtype=dtype) - - def testFullyConnectedKroneckerFactorInitWithBias(self): - for dtype in (dtypes.float32_ref, dtypes.float64_ref): - self._testFullyConnectedKroneckerFactorInit(True, [4, 4], dtype=dtype) - - def testMakeCovarianceUpdateOpWithBias(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') - factor = ff.FullyConnectedKroneckerFactor(((tensor,),), has_bias=True) - factor.instantiate_cov_variables() - - sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(factor.make_covariance_update_op(.5)) - self.assertAllClose([[3, 3.5, 1], [3.5, 5.5, 1.5], [1, 1.5, 1]], new_cov) - - def testMakeCovarianceUpdateOpNoBias(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') - factor = ff.FullyConnectedKroneckerFactor(((tensor,),)) - factor.instantiate_cov_variables() - - sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(factor.make_covariance_update_op(.5)) - self.assertAllClose([[3, 3.5], [3.5, 5.5]], new_cov) - - -class ConvFactorTestCase(test.TestCase): - - def assertMatrixRank(self, rank, matrix, atol=1e-5): - assert rank <= matrix.shape[0], 'Rank cannot be larger than matrix size.' - eigvals = np.linalg.eigvals(matrix) - nnz_eigvals = np.sum(eigvals > atol) - self.assertEqual( - rank, - nnz_eigvals, - msg=('Found %d of %d expected non-zero eigenvalues: %s.' % - (nnz_eigvals, rank, eigvals))) - - -class ConvInputKroneckerFactorTest(ConvFactorTestCase): - - def test3DConvolution(self): - with tf_ops.Graph().as_default(): - batch_size = 1 - width = 3 - in_channels = 3**3 - out_channels = 4 - - factor = ff.ConvInputKroneckerFactor( - inputs=(random_ops.random_uniform( - (batch_size, width, width, width, in_channels), seed=0),), - filter_shape=(width, width, width, in_channels, out_channels), - padding='SAME', - strides=(2, 2, 2), - extract_patches_fn='extract_convolution_patches', - has_bias=False) - factor.instantiate_cov_variables() - - # Ensure shape of covariance matches input size of filter. - input_size = in_channels * (width**3) - self.assertEqual([input_size, input_size], - factor.get_cov().shape.as_list()) - - # Ensure cov_update_op doesn't crash. - with self.cached_session() as sess: - sess.run(tf_variables.global_variables_initializer()) - sess.run(factor.make_covariance_update_op(0.0)) - cov = sess.run(factor.get_cov()) - - # Cov should be rank-8, as the filter will be applied at each corner of - # the 4-D cube. - self.assertMatrixRank(8, cov) - - def testPointwiseConv2d(self): - with tf_ops.Graph().as_default(): - batch_size = 1 - width = 3 - in_channels = 3**2 - out_channels = 4 - - factor = ff.ConvInputKroneckerFactor( - inputs=(random_ops.random_uniform( - (batch_size, width, width, in_channels), seed=0),), - filter_shape=(1, 1, in_channels, out_channels), - padding='SAME', - strides=(1, 1, 1, 1), - extract_patches_fn='extract_pointwise_conv2d_patches', - has_bias=False) - factor.instantiate_cov_variables() - - # Ensure shape of covariance matches input size of filter. - self.assertEqual([in_channels, in_channels], - factor.get_cov().shape.as_list()) - - # Ensure cov_update_op doesn't crash. - with self.cached_session() as sess: - sess.run(tf_variables.global_variables_initializer()) - sess.run(factor.make_covariance_update_op(0.0)) - cov = sess.run(factor.get_cov()) - - # Cov should be rank-9, as the filter will be applied at each location. - self.assertMatrixRank(9, cov) - - def testStrides(self): - with tf_ops.Graph().as_default(): - batch_size = 1 - width = 3 - in_channels = 3**2 - out_channels = 4 - - factor = ff.ConvInputKroneckerFactor( - inputs=(random_ops.random_uniform( - (batch_size, width, width, in_channels), seed=0),), - filter_shape=(1, 1, in_channels, out_channels), - padding='SAME', - strides=(1, 2, 1, 1), - extract_patches_fn='extract_image_patches', - has_bias=False) - factor.instantiate_cov_variables() - - with self.cached_session() as sess: - sess.run(tf_variables.global_variables_initializer()) - sess.run(factor.make_covariance_update_op(0.0)) - cov = sess.run(factor.get_cov()) - - # Cov should be the sum of 3 * 2 = 6 outer products. - self.assertMatrixRank(6, cov) - - def testDilationRate(self): - with tf_ops.Graph().as_default(): - batch_size = 1 - width = 3 - in_channels = 2 - out_channels = 4 - - factor = ff.ConvInputKroneckerFactor( - inputs=(random_ops.random_uniform( - (batch_size, width, width, in_channels), seed=0),), - filter_shape=(3, 3, in_channels, out_channels), - padding='SAME', - extract_patches_fn='extract_image_patches', - strides=(1, 1, 1, 1), - dilation_rate=(1, width, width, 1), - has_bias=False) - factor.instantiate_cov_variables() - - with self.cached_session() as sess: - sess.run(tf_variables.global_variables_initializer()) - sess.run(factor.make_covariance_update_op(0.0)) - cov = sess.run(factor.get_cov()) - - # Cov should be rank = in_channels, as only the center of the filter - # receives non-zero input for each input channel. - self.assertMatrixRank(in_channels, cov) - - def testConvInputKroneckerFactorInitNoBias(self): - with tf_ops.Graph().as_default(): - tensor = array_ops.ones((64, 1, 2, 3), name='a/b/c') - factor = ff.ConvInputKroneckerFactor( - inputs=(tensor,), - filter_shape=(1, 2, 3, 4), - padding='SAME', - has_bias=False) - factor.instantiate_cov_variables() - self.assertEqual([1 * 2 * 3, 1 * 2 * 3], - factor.get_cov().get_shape().as_list()) - - def testConvInputKroneckerFactorInit(self): - with tf_ops.Graph().as_default(): - tensor = array_ops.ones((64, 1, 2, 3), name='a/b/c') - factor = ff.ConvInputKroneckerFactor( - (tensor,), filter_shape=(1, 2, 3, 4), padding='SAME', has_bias=True) - factor.instantiate_cov_variables() - self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1], - factor.get_cov().get_shape().as_list()) - - def testConvInputKroneckerFactorInitFloat64(self): - with tf_ops.Graph().as_default(): - dtype = dtypes.float64_ref - tensor = array_ops.ones((64, 1, 2, 3), name='a/b/c', dtype=dtypes.float64) - factor = ff.ConvInputKroneckerFactor( - (tensor,), filter_shape=(1, 2, 3, 4), padding='SAME', has_bias=True) - factor.instantiate_cov_variables() - cov = factor.get_cov() - self.assertEqual(cov.dtype, dtype) - self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1], - cov.get_shape().as_list()) - - def testMakeCovarianceUpdateOpWithBias(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - input_shape = (2, 1, 1, 1) - tensor = array_ops.constant( - np.arange(1, 1 + np.prod(input_shape)).reshape(input_shape).astype( - np.float32)) - factor = ff.ConvInputKroneckerFactor( - (tensor,), filter_shape=(1, 1, 1, 1), padding='SAME', has_bias=True) - factor.instantiate_cov_variables() - - sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(factor.make_covariance_update_op(0.)) - self.assertAllClose( - [ - [(1. + 4.) / 2., (1. + 2.) / 2.], # - [(1. + 2.) / 2., (1. + 1.) / 2.] - ], # - new_cov) - - def testMakeCovarianceUpdateOpNoBias(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - input_shape = (2, 1, 1, 1) - tensor = array_ops.constant( - np.arange(1, 1 + np.prod(input_shape)).reshape(input_shape).astype( - np.float32)) - factor = ff.ConvInputKroneckerFactor( - (tensor,), filter_shape=(1, 1, 1, 1), padding='SAME') - factor.instantiate_cov_variables() - - sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(factor.make_covariance_update_op(0.)) - self.assertAllClose([[(1. + 4.) / 2.]], new_cov) - - def testSubSample(self): - with tf_ops.Graph().as_default(): - patches_1 = array_ops.constant(1, shape=(10, 2)) - patches_2 = array_ops.constant(1, shape=(10, 8)) - patches_3 = array_ops.constant(1, shape=(3, 3)) - patches_1_sub = ff._subsample_for_cov_computation(patches_1) - patches_2_sub = ff._subsample_for_cov_computation(patches_2) - patches_3_sub = ff._subsample_for_cov_computation(patches_3) - patches_1_sub_batch_size = patches_1_sub.shape.as_list()[0] - patches_2_sub_batch_size = patches_2_sub.shape.as_list()[0] - patches_3_sub_batch_size = patches_3_sub.shape.as_list()[0] - self.assertEqual(2, patches_1_sub_batch_size) - self.assertEqual(8, patches_2_sub_batch_size) - self.assertEqual(3, patches_3_sub_batch_size) - - -class ConvOutputKroneckerFactorTest(ConvFactorTestCase): - - def test3DConvolution(self): - with tf_ops.Graph().as_default(): - batch_size = 1 - width = 3 - out_channels = width**3 - - factor = ff.ConvOutputKroneckerFactor(outputs_grads=([ - random_ops.random_uniform( - (batch_size, width, width, width, out_channels), seed=0) - ],)) - factor.instantiate_cov_variables() - - with self.cached_session() as sess: - sess.run(tf_variables.global_variables_initializer()) - sess.run(factor.make_covariance_update_op(0.0)) - cov = sess.run(factor.get_cov()) - - # Cov should be rank 3^3, as each spatial position donates a rank-1 - # update. - self.assertMatrixRank(width**3, cov) - - def testConvOutputKroneckerFactorInit(self): - with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3, 4, 5), name='a/b/c') - factor = ff.ConvOutputKroneckerFactor(((tensor,),)) - factor.instantiate_cov_variables() - self.assertEqual([5, 5], factor.get_cov().get_shape().as_list()) - - def testConvOutputKroneckerFactorInitFloat64(self): - with tf_ops.Graph().as_default(): - dtype = dtypes.float64_ref - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3, 4, 5), dtype=dtype, name='a/b/c') - factor = ff.ConvOutputKroneckerFactor(((tensor,),)) - factor.instantiate_cov_variables() - cov = factor.get_cov() - self.assertEqual(cov.dtype, dtype) - self.assertEqual([5, 5], cov.get_shape().as_list()) - - def testMakeCovarianceUpdateOp(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - tensor = np.arange(1, 17).reshape(2, 2, 2, 2).astype(np.float32) - factor = ff.ConvOutputKroneckerFactor(((array_ops.constant(tensor),),)) - factor.instantiate_cov_variables() - - sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(factor.make_covariance_update_op(.5)) - self.assertAllClose([[43, 46.5], [46.5, 51.5]], new_cov) - - -class FullyConnectedMultiKFTest(test.TestCase): - - def testFullyConnectedMultiKFInit(self): - with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), name='a/b/c') - factor = ff.FullyConnectedMultiKF(((tensor,),), has_bias=False) - factor.instantiate_cov_variables() - self.assertEqual([3, 3], factor.get_cov().get_shape().as_list()) - - def testFullyConnectedMultiKFInitFloat64(self): - with tf_ops.Graph().as_default(): - dtype = dtypes.float64_ref - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') - factor = ff.FullyConnectedMultiKF(((tensor,),), has_bias=False) - factor.instantiate_cov_variables() - cov = factor.get_cov() - self.assertEqual(cov.dtype, dtype) - self.assertEqual([3, 3], cov.get_shape().as_list()) - - def testMakeCovarianceUpdateOpWithBias(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') - factor = ff.FullyConnectedMultiKF(((tensor,),), has_bias=True) - factor.instantiate_cov_variables() - - sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(factor.make_covariance_update_op(.5)) - self.assertAllClose([[3, 3.5, 1], [3.5, 5.5, 1.5], [1, 1.5, 1]], new_cov) - - def testMakeCovarianceUpdateOpNoBias(self): - with tf_ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') - factor = ff.FullyConnectedMultiKF(((tensor,),)) - factor.instantiate_cov_variables() - - sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(factor.make_covariance_update_op(.5)) - self.assertAllClose([[3, 3.5], [3.5, 5.5]], new_cov) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py deleted file mode 100644 index 586fcd4c3c..0000000000 --- a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py +++ /dev/null @@ -1,597 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tf.contrib.kfac.layer_collection.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.kfac.python.ops import fisher_blocks -from tensorflow.contrib.kfac.python.ops import fisher_factors -from tensorflow.contrib.kfac.python.ops import layer_collection -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.platform import test - - -class MockFisherBlock(object): - """A fake FisherBlock.""" - - num_registered_towers = 2 - - def __init__(self, name='MockFisherBlock'): - self.name = name - - def __eq__(self, other): - return isinstance(other, MockFisherBlock) and other.name == self.name - - def __hash__(self): - return hash(self.name) - - -class LayerParametersDictTest(test.TestCase): - - def testSetItem(self): - """Ensure insertion, contains, retrieval works for supported key types.""" - with ops.Graph().as_default(): - lp_dict = layer_collection.LayerParametersDict() - - x = array_ops.constant(0) - y0 = array_ops.constant(0) - y1 = array_ops.constant(0) - z0 = array_ops.constant(0) - z1 = array_ops.constant(0) - keys = [x, (y0, y1), [z0, z1]] - for key in keys: - lp_dict[key] = key - - for key in keys: - self.assertTrue(key in lp_dict) - self.assertEqual(lp_dict[key], key) - - def testSetItemOverlap(self): - """Ensure insertion fails if key overlaps with existing key.""" - with ops.Graph().as_default(): - lp_dict = layer_collection.LayerParametersDict() - - x = array_ops.constant(0) - y = array_ops.constant(0) - lp_dict[x] = 'value' - - with self.assertRaises(ValueError): - lp_dict[(x, y)] = 'value' - - # Ensure 'y' wasn't inserted. - self.assertTrue(x in lp_dict) - self.assertFalse(y in lp_dict) - - -class LayerCollectionTest(test.TestCase): - - def testLayerCollectionInit(self): - lc = layer_collection.LayerCollection() - self.assertEqual(0, len(lc.get_blocks())) - self.assertEqual(0, len(lc.get_factors())) - self.assertFalse(lc.losses) - - def testRegisterBlocks(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - lc = layer_collection.LayerCollection() - lc.register_fully_connected( - array_ops.constant(1), array_ops.constant(2), array_ops.constant(3)) - lc.register_fully_connected( - array_ops.constant(1), - array_ops.constant(2), - array_ops.constant(3), - approx=layer_collection.APPROX_DIAGONAL_NAME) - lc.register_conv2d( - params=array_ops.ones((2, 3, 4, 5)), - strides=[1, 1, 1, 1], - padding='SAME', - inputs=array_ops.ones((1, 2, 3, 4)), - outputs=array_ops.ones((1, 1, 1, 5))) - lc.register_conv2d( - params=array_ops.ones((2, 3, 4, 5)), - strides=[1, 1, 1, 1], - padding='SAME', - inputs=array_ops.ones((1, 2, 3, 4)), - outputs=array_ops.ones((1, 1, 1, 5)), - approx=layer_collection.APPROX_DIAGONAL_NAME) - lc.register_separable_conv2d( - depthwise_params=array_ops.ones((3, 3, 1, 2)), - pointwise_params=array_ops.ones((1, 1, 2, 4)), - inputs=array_ops.ones((32, 5, 5, 1)), - depthwise_outputs=array_ops.ones((32, 5, 5, 2)), - pointwise_outputs=array_ops.ones((32, 5, 5, 4)), - strides=[1, 1, 1, 1], - padding='SAME') - lc.register_convolution( - params=array_ops.ones((3, 3, 1, 8)), - inputs=array_ops.ones((32, 5, 5, 1)), - outputs=array_ops.ones((32, 5, 5, 8)), - padding='SAME') - lc.register_generic( - array_ops.constant(5), 16, approx=layer_collection.APPROX_FULL_NAME) - lc.register_generic( - array_ops.constant(6), - 16, - approx=layer_collection.APPROX_DIAGONAL_NAME) - lc.register_fully_connected_multi( - array_ops.constant(1), - (array_ops.constant(2), array_ops.constant(3)), - (array_ops.constant(4), array_ops.constant(5))) - lc.register_conv2d_multi( - params=array_ops.ones((2, 3, 4, 5)), - strides=[1, 1, 1, 1], - padding='SAME', - inputs=(array_ops.ones((1, 2, 3, 4)), array_ops.ones((5, 6, 7, 8))), - outputs=(array_ops.ones((1, 1, 1, 5)), array_ops.ones((2, 2, 2, 10)))) - lc.register_embedding_multi( - array_ops.constant((1,)), - (array_ops.constant(2), array_ops.constant(3)), - (array_ops.constant(4), array_ops.constant(5))) - - self.assertEqual(12, len(lc.get_blocks())) - - def testRegisterBlocksMultipleRegistrations(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - lc = layer_collection.LayerCollection() - key = array_ops.constant(1) - lc.register_fully_connected(key, array_ops.constant(2), - array_ops.constant(3)) - with self.assertRaises(ValueError) as cm: - lc.register_generic(key, 16) - self.assertIn('already in LayerCollection', str(cm.exception)) - - def testRegisterSingleParamNotRegistered(self): - x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) - lc = layer_collection.LayerCollection() - lc.fisher_blocks = { - variable_scope.get_variable('y', initializer=array_ops.constant(1,)): - '1' - } - lc.register_block(x, 'foo') - - def testShouldRegisterSingleParamRegistered(self): - x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) - lc = layer_collection.LayerCollection() - lc.fisher_blocks = {x: '1'} - with self.assertRaises(ValueError) as cm: - lc.register_block(x, 'foo') - self.assertIn('already in LayerCollection', str(cm.exception)) - - def testRegisterSingleParamRegisteredInTuple(self): - x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) - y = variable_scope.get_variable('y', initializer=array_ops.constant(1,)) - lc = layer_collection.LayerCollection() - lc.fisher_blocks = {(x, y): '1'} - with self.assertRaises(ValueError) as cm: - lc.register_block(x, 'foo') - self.assertIn('was already registered', str(cm.exception)) - - def testRegisterTupleParamNotRegistered(self): - x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) - y = variable_scope.get_variable('y', initializer=array_ops.constant(1,)) - lc = layer_collection.LayerCollection() - lc.fisher_blocks = { - variable_scope.get_variable('z', initializer=array_ops.constant(1,)): - '1' - } - - lc.register_block((x, y), 'foo') - self.assertEqual(set(['1', 'foo']), set(lc.get_blocks())) - - def testRegisterTupleParamRegistered(self): - x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) - y = variable_scope.get_variable('y', initializer=array_ops.constant(1,)) - lc = layer_collection.LayerCollection() - lc.fisher_blocks = {(x, y): '1'} - - with self.assertRaises(ValueError) as cm: - lc.register_block((x, y), 'foo') - self.assertIn('already in LayerCollection', str(cm.exception)) - - def testRegisterTupleParamRegisteredInSuperset(self): - x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) - y = variable_scope.get_variable('y', initializer=array_ops.constant(1,)) - z = variable_scope.get_variable('z', initializer=array_ops.constant(1,)) - lc = layer_collection.LayerCollection() - lc.fisher_blocks = {(x, y, z): '1'} - - with self.assertRaises(ValueError) as cm: - lc.register_block((x, y), 'foo') - self.assertIn('was already registered', str(cm.exception)) - - def testRegisterTupleParamSomeRegistered(self): - x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) - y = variable_scope.get_variable('y', initializer=array_ops.constant(1,)) - z = variable_scope.get_variable('z', initializer=array_ops.constant(1,)) - lc = layer_collection.LayerCollection() - lc.fisher_blocks = {x: MockFisherBlock('1'), z: MockFisherBlock('2')} - - with self.assertRaises(ValueError) as cm: - lc.register_block((x, y), MockFisherBlock('foo')) - self.assertIn('was already registered', str(cm.exception)) - - def testRegisterTupleVarSomeRegisteredInOtherTuples(self): - x = variable_scope.get_variable('x', initializer=array_ops.constant(1,)) - y = variable_scope.get_variable('y', initializer=array_ops.constant(1,)) - z = variable_scope.get_variable('z', initializer=array_ops.constant(1,)) - w = variable_scope.get_variable('w', initializer=array_ops.constant(1,)) - lc = layer_collection.LayerCollection() - lc.fisher_blocks = {(x, z): '1', (z, w): '2'} - - with self.assertRaises(ValueError) as cm: - lc.register_block((x, y), 'foo') - self.assertIn('was already registered', str(cm.exception)) - - def testRegisterCategoricalPredictiveDistribution(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - logits = linalg_ops.eye(2) - - lc = layer_collection.LayerCollection() - lc.register_categorical_predictive_distribution(logits, seed=200) - single_loss = sess.run(lc.total_sampled_loss()) - - lc2 = layer_collection.LayerCollection() - lc2.register_categorical_predictive_distribution(logits, seed=200) - lc2.register_categorical_predictive_distribution(logits, seed=200) - double_loss = sess.run(lc2.total_sampled_loss()) - self.assertAlmostEqual(2 * single_loss, double_loss) - - def testLossFunctionByName(self): - """Ensure loss functions can be identified by name.""" - with ops.Graph().as_default(): - logits = linalg_ops.eye(2) - lc = layer_collection.LayerCollection() - - # Create a new loss function by name. - lc.register_categorical_predictive_distribution(logits, name='loss1') - self.assertEqual(1, len(lc.towers_by_loss)) - - # Add logits to same loss function. - lc.register_categorical_predictive_distribution( - logits, name='loss1', reuse=True) - self.assertEqual(1, len(lc.towers_by_loss)) - - # Add another new loss function. - lc.register_categorical_predictive_distribution(logits, name='loss2') - self.assertEqual(2, len(lc.towers_by_loss)) - - def testLossFunctionWithoutName(self): - """Ensure loss functions get unique names if 'name' not specified.""" - with ops.Graph().as_default(): - logits = linalg_ops.eye(2) - lc = layer_collection.LayerCollection() - - # Create a new loss function with default names. - lc.register_categorical_predictive_distribution(logits) - lc.register_categorical_predictive_distribution(logits) - self.assertEqual(2, len(lc.losses)) - - def testCategoricalPredictiveDistributionMultipleMinibatches(self): - """Ensure multiple minibatches are registered.""" - with ops.Graph().as_default(): - batch_size = 3 - output_size = 2 - logits = array_ops.zeros([batch_size, output_size]) - targets = array_ops.ones([batch_size], dtype=dtypes.int32) - lc = layer_collection.LayerCollection() - - # Create a new loss function. - lc.register_categorical_predictive_distribution( - logits, targets=targets, name='loss1') - - # Can add when reuse=True - lc.register_categorical_predictive_distribution( - logits, targets=targets, name='loss1', reuse=True) - - # Can add when reuse=VARIABLE_SCOPE and reuse=True there. - with variable_scope.variable_scope( - variable_scope.get_variable_scope(), reuse=True): - lc.register_categorical_predictive_distribution( - logits, - targets=targets, - name='loss1', - reuse=layer_collection.VARIABLE_SCOPE) - - # Can't add when reuse=False - with self.assertRaises(KeyError): - lc.register_categorical_predictive_distribution( - logits, targets=targets, name='loss1', reuse=False) - - # Can't add when reuse=VARIABLE_SCOPE and reuse=False there. - with self.assertRaises(KeyError): - lc.register_categorical_predictive_distribution( - logits, - targets=targets, - name='loss1', - reuse=layer_collection.VARIABLE_SCOPE) - - self.assertEqual(len(lc.towers_by_loss), 1) - # Three successful registrations. - self.assertEqual(len(lc.towers_by_loss[0]), 3) - - def testRegisterCategoricalPredictiveDistributionBatchSize1(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - logits = random_ops.random_normal((1, 2)) - lc = layer_collection.LayerCollection() - - lc.register_categorical_predictive_distribution(logits, seed=200) - - def testRegisterCategoricalPredictiveDistributionSpecifiedTargets(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - logits = array_ops.constant([[1., 2.], [3., 4.]], dtype=dtypes.float32) - lc = layer_collection.LayerCollection() - targets = array_ops.constant([0, 1], dtype=dtypes.int32) - - lc.register_categorical_predictive_distribution(logits, targets=targets) - single_loss = sess.run(lc.total_loss()) - self.assertAlmostEqual(1.6265233, single_loss) - - def testRegisterNormalPredictiveDistribution(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - predictions = array_ops.constant( - [[1., 2.], [3., 4]], dtype=dtypes.float32) - - lc = layer_collection.LayerCollection() - lc.register_normal_predictive_distribution(predictions, 1., seed=200) - single_loss = sess.run(lc.total_sampled_loss()) - - lc2 = layer_collection.LayerCollection() - lc2.register_normal_predictive_distribution(predictions, 1., seed=200) - lc2.register_normal_predictive_distribution(predictions, 1., seed=200) - double_loss = sess.run(lc2.total_sampled_loss()) - - self.assertAlmostEqual(2 * single_loss, double_loss) - - def testRegisterNormalPredictiveDistributionSpecifiedTargets(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - predictions = array_ops.constant( - [[1., 2.], [3., 4.]], dtype=dtypes.float32) - lc = layer_collection.LayerCollection() - targets = array_ops.constant([[3., 1.], [4., 2.]], dtype=dtypes.float32) - - lc.register_normal_predictive_distribution( - predictions, 2.**2, targets=targets) - single_loss = sess.run(lc.total_loss()) - self.assertAlmostEqual(7.6983433, single_loss) - - def ensureLayerReuseWorks(self, register_fn): - """Ensure the 'reuse' keyword argument function as intended. - - Args: - register_fn: function for registering a layer. Arguments are - layer_collection, reuse, and approx. - """ - # Fails on second if reuse=False. - lc = layer_collection.LayerCollection() - register_fn(lc) - with self.assertRaises(ValueError): - register_fn(lc, reuse=False) - - # Succeeds on second if reuse=True. - lc = layer_collection.LayerCollection() - register_fn(lc) - register_fn(lc, reuse=True) - - # Fails on second if reuse=VARIABLE_SCOPE and no variable reuse. - lc = layer_collection.LayerCollection() - register_fn(lc) - with self.assertRaises(ValueError): - register_fn(lc, reuse=layer_collection.VARIABLE_SCOPE) - - # Succeeds on second if reuse=VARIABLE_SCOPE and variable reuse. - lc = layer_collection.LayerCollection() - register_fn(lc) - with variable_scope.variable_scope( - variable_scope.get_variable_scope(), reuse=True): - register_fn(lc, reuse=layer_collection.VARIABLE_SCOPE) - - # Fails if block type changes. - lc = layer_collection.LayerCollection() - register_fn(lc, approx=layer_collection.APPROX_KRONECKER_NAME) - with self.assertRaises(ValueError): - register_fn(lc, approx=layer_collection.APPROX_DIAGONAL_NAME, reuse=True) - - # Fails if reuse requested but no FisherBlock exists. - lc = layer_collection.LayerCollection() - with self.assertRaises(KeyError): - register_fn(lc, reuse=True) - - def testRegisterFullyConnectedReuse(self): - """Ensure the 'reuse' works with register_fully_connected.""" - with ops.Graph().as_default(): - inputs = array_ops.ones([2, 10]) - outputs = array_ops.zeros([2, 5]) - params = ( - variable_scope.get_variable('w', [10, 5]), # - variable_scope.get_variable('b', [5])) - - def register_fn(lc, **kwargs): - lc.register_fully_connected( - params=params, inputs=inputs, outputs=outputs, **kwargs) - - self.ensureLayerReuseWorks(register_fn) - - def testRegisterConv2dReuse(self): - """Ensure the 'reuse' works with register_conv2d.""" - with ops.Graph().as_default(): - inputs = array_ops.ones([2, 5, 5, 10]) - outputs = array_ops.zeros([2, 5, 5, 3]) - params = ( - variable_scope.get_variable('w', [1, 1, 10, 3]), # - variable_scope.get_variable('b', [3])) - - def register_fn(lc, **kwargs): - lc.register_conv2d( - params=params, - strides=[1, 1, 1, 1], - padding='SAME', - inputs=inputs, - outputs=outputs, - **kwargs) - - self.ensureLayerReuseWorks(register_fn) - - def testReuseWithInvalidRegistration(self): - """Invalid registrations shouldn't overwrite existing blocks.""" - with ops.Graph().as_default(): - inputs = array_ops.ones([2, 5, 5, 10]) - outputs = array_ops.zeros([2, 5, 5, 3]) - w = variable_scope.get_variable('w', [1, 1, 10, 3]) - b = variable_scope.get_variable('b', [3]) - lc = layer_collection.LayerCollection() - lc.register_fully_connected(w, inputs, outputs) - self.assertEqual(lc.fisher_blocks[w].num_registered_towers, 1) - with self.assertRaises(KeyError): - lc.register_fully_connected((w, b), inputs, outputs, reuse=True) - self.assertNotIn((w, b), lc.fisher_blocks) - self.assertEqual(lc.fisher_blocks[w].num_registered_towers, 1) - lc.register_fully_connected(w, inputs, outputs, reuse=True) - self.assertEqual(lc.fisher_blocks[w].num_registered_towers, 2) - - def testMakeOrGetFactor(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - lc = layer_collection.LayerCollection() - key = array_ops.constant(1) - lc.make_or_get_factor(fisher_factors.FullFactor, ((key,), 16)) - lc.make_or_get_factor(fisher_factors.FullFactor, ((key,), 16)) - lc.make_or_get_factor(fisher_factors.FullFactor, - ((array_ops.constant(2),), 16)) - - self.assertEqual(2, len(lc.get_factors())) - variables = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertTrue( - all([var.name.startswith('LayerCollection') for var in variables])) - - def testMakeOrGetFactorCustomScope(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - scope = 'Foo' - lc = layer_collection.LayerCollection(name=scope) - key = array_ops.constant(1) - lc.make_or_get_factor(fisher_factors.FullFactor, ((key,), 16)) - lc.make_or_get_factor(fisher_factors.FullFactor, ((key,), 16)) - lc.make_or_get_factor(fisher_factors.FullFactor, - ((array_ops.constant(2),), 16)) - - self.assertEqual(2, len(lc.get_factors())) - variables = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertTrue(all([var.name.startswith(scope) for var in variables])) - - def testIdentifyLinkedParametersSomeRegisteredInOtherTuples(self): - x = variable_scope.get_variable('x', shape=()) - y = variable_scope.get_variable('y', shape=()) - z = variable_scope.get_variable('z', shape=()) - lc = layer_collection.LayerCollection() - lc.define_linked_parameters((x, y)) - - with self.assertRaises(ValueError): - lc.define_linked_parameters((x, z)) - - def testIdentifySubsetPreviouslyRegisteredTensor(self): - x = variable_scope.get_variable('x', shape=()) - y = variable_scope.get_variable('y', shape=()) - lc = layer_collection.LayerCollection() - lc.define_linked_parameters((x, y)) - - with self.assertRaises(ValueError): - lc.define_linked_parameters(x) - - def testSpecifyApproximation(self): - w_0 = variable_scope.get_variable('w_0', [10, 10]) - w_1 = variable_scope.get_variable('w_1', [10, 10]) - - b_0 = variable_scope.get_variable('b_0', [10]) - b_1 = variable_scope.get_variable('b_1', [10]) - - x_0 = array_ops.placeholder(dtypes.float32, shape=(32, 10)) - x_1 = array_ops.placeholder(dtypes.float32, shape=(32, 10)) - - pre_bias_0 = math_ops.matmul(x_0, w_0) - pre_bias_1 = math_ops.matmul(x_1, w_1) - - # Build the fully connected layers in the graph. - pre_bias_0 + b_0 # pylint: disable=pointless-statement - pre_bias_1 + b_1 # pylint: disable=pointless-statement - - lc = layer_collection.LayerCollection() - lc.define_linked_parameters( - w_0, approximation=layer_collection.APPROX_DIAGONAL_NAME) - lc.define_linked_parameters( - w_1, approximation=layer_collection.APPROX_DIAGONAL_NAME) - lc.define_linked_parameters( - b_0, approximation=layer_collection.APPROX_FULL_NAME) - lc.define_linked_parameters( - b_1, approximation=layer_collection.APPROX_FULL_NAME) - - lc.register_fully_connected(w_0, x_0, pre_bias_0) - lc.register_fully_connected( - w_1, x_1, pre_bias_1, approx=layer_collection.APPROX_KRONECKER_NAME) - self.assertIsInstance(lc.fisher_blocks[w_0], - fisher_blocks.FullyConnectedDiagonalFB) - self.assertIsInstance(lc.fisher_blocks[w_1], - fisher_blocks.FullyConnectedKFACBasicFB) - - lc.register_generic(b_0, batch_size=1) - lc.register_generic( - b_1, batch_size=1, approx=layer_collection.APPROX_DIAGONAL_NAME) - self.assertIsInstance(lc.fisher_blocks[b_0], fisher_blocks.FullFB) - self.assertIsInstance(lc.fisher_blocks[b_1], fisher_blocks.NaiveDiagonalFB) - - def testDefaultLayerCollection(self): - with ops.Graph().as_default(): - # Can't get default if there isn't one set. - with self.assertRaises(ValueError): - layer_collection.get_default_layer_collection() - - # Can't set default twice. - lc = layer_collection.LayerCollection() - layer_collection.set_default_layer_collection(lc) - with self.assertRaises(ValueError): - layer_collection.set_default_layer_collection(lc) - - # Same as one set. - self.assertTrue(lc is layer_collection.get_default_layer_collection()) - - # Can set to None. - layer_collection.set_default_layer_collection(None) - with self.assertRaises(ValueError): - layer_collection.get_default_layer_collection() - - # as_default() is the same as setting/clearing. - with lc.as_default(): - self.assertTrue(lc is layer_collection.get_default_layer_collection()) - with self.assertRaises(ValueError): - layer_collection.get_default_layer_collection() - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py b/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py deleted file mode 100644 index f424e02360..0000000000 --- a/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tf.contrib.kfac.loss_functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.kfac.python.ops import loss_functions -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.platform import test - - -class InsertSliceInZerosTest(test.TestCase): - - def testBadShape(self): - bad_shaped_ones = array_ops.ones(shape=[1, 3]) # n.b. shape[1] != 1 - with self.assertRaises(ValueError): - loss_functions.insert_slice_in_zeros(bad_shaped_ones, 1, 42, 17) - - def test3d(self): - input_tensor = constant_op.constant([[[1, 2]], [[3, 4]]]) - expected_output_array = [[[1, 2], [0, 0]], [[3, 4], [0, 0]]] - op = loss_functions.insert_slice_in_zeros(input_tensor, 1, 2, 0) - with self.cached_session() as sess: - actual_output_array = sess.run(op) - self.assertAllEqual(expected_output_array, actual_output_array) - - -class CategoricalLogitsNegativeLogProbLossTest(test.TestCase): - - def testSample(self): - """Ensure samples can be drawn.""" - with ops.Graph().as_default(), self.cached_session() as sess: - logits = np.asarray([ - [0., 0., 0.], # - [1., -1., 0.] - ]).astype(np.float32) - loss = loss_functions.CategoricalLogitsNegativeLogProbLoss( - array_ops.constant(logits)) - sample = loss.sample(42) - sample = sess.run(sample) - self.assertEqual(sample.shape, (2,)) - - def testEvaluateOnTargets(self): - """Ensure log probability can be evaluated correctly.""" - with ops.Graph().as_default(), self.cached_session() as sess: - logits = np.asarray([ - [0., 0., 0.], # - [1., -1., 0.] - ]).astype(np.float32) - targets = np.asarray([2, 1]).astype(np.int32) - loss = loss_functions.CategoricalLogitsNegativeLogProbLoss( - array_ops.constant(logits), targets=array_ops.constant(targets)) - neg_log_prob = loss.evaluate() - neg_log_prob = sess.run(neg_log_prob) - - # Calculate explicit log probability of targets. - probs = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True) - log_probs = np.log([ - probs[0, targets[0]], # - probs[1, targets[1]] - ]) - expected_log_prob = np.sum(log_probs) - - self.assertAllClose(neg_log_prob, -expected_log_prob) - - def testEvaluateOnSample(self): - """Ensure log probability of a sample can be drawn.""" - with ops.Graph().as_default(), self.cached_session() as sess: - logits = np.asarray([ - [0., 0., 0.], # - [1., -1., 0.] - ]).astype(np.float32) - loss = loss_functions.CategoricalLogitsNegativeLogProbLoss( - array_ops.constant(logits)) - neg_log_prob = loss.evaluate_on_sample(42) - - # Simply ensure this doesn't crash. As the output is random, it's - # difficult to say if the output is correct or not... - neg_log_prob = sess.run(neg_log_prob) - - def testMultiplyFisherSingleVector(self): - with ops.Graph().as_default(), self.cached_session() as sess: - logits = np.array([1., 2., 3.]) - loss = loss_functions.CategoricalLogitsNegativeLogProbLoss(logits) - - # the LossFunction.multiply_fisher docstring only says it supports the - # case where the vector is the same shape as the input natural parameters - # (i.e. the logits here), but here we also test leading dimensions - vector = np.array([1., 2., 3.]) - vectors = [vector, vector.reshape(1, -1), np.stack([vector] * 4)] - - probs = np.exp(logits - np.logaddexp.reduce(logits)) - fisher = np.diag(probs) - np.outer(probs, probs) - - for vector in vectors: - result = loss.multiply_fisher(vector) - expected_result = np.dot(vector, fisher) - self.assertAllClose(expected_result, sess.run(result)) - - def testMultiplyFisherBatch(self): - with ops.Graph().as_default(), self.cached_session() as sess: - logits = np.array([[1., 2., 3.], [4., 6., 8.]]) - loss = loss_functions.CategoricalLogitsNegativeLogProbLoss(logits) - - vector = np.array([[1., 2., 3.], [5., 3., 1.]]) - - na = np.newaxis - probs = np.exp(logits - np.logaddexp.reduce(logits, axis=-1, - keepdims=True)) - fishers = probs[..., na] * np.eye(3) - probs[..., na] * probs[..., na, :] - - result = loss.multiply_fisher(vector) - expected_result = np.matmul(vector[..., na, :], fishers)[..., 0, :] - self.assertEqual(sess.run(result).shape, logits.shape) - self.assertAllClose(expected_result, sess.run(result)) - - -class OnehotCategoricalLogitsNegativeLogProbLossTest(test.TestCase): - - def testSample(self): - """Ensure samples can be drawn.""" - with ops.Graph().as_default(), self.cached_session() as sess: - logits = np.asarray([ - [0., 0., 0.], # - [1., -1., 0.] - ]).astype(np.float32) - loss = loss_functions.OnehotCategoricalLogitsNegativeLogProbLoss( - array_ops.constant(logits)) - sample = loss.sample(42) - sample = sess.run(sample) - self.assertEqual(sample.shape, (2, 3)) - - def testEvaluateOnTargets(self): - """Ensure log probability can be evaluated correctly.""" - with ops.Graph().as_default(), self.cached_session() as sess: - logits = np.asarray([ - [0., 0., 0.], # - [1., -1., 0.] - ]).astype(np.float32) - targets = np.asarray([2, 1]).astype(np.int32) - loss = loss_functions.OnehotCategoricalLogitsNegativeLogProbLoss( - array_ops.constant(logits), targets=array_ops.one_hot(targets, 3)) - neg_log_prob = loss.evaluate() - neg_log_prob = sess.run(neg_log_prob) - - # Calculate explicit log probability of targets. - probs = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True) - log_probs = np.log([ - probs[0, targets[0]], # - probs[1, targets[1]] - ]) - expected_log_prob = np.sum(log_probs) - - self.assertAllClose(neg_log_prob, -expected_log_prob) - - def testEvaluateOnSample(self): - """Ensure log probability of a sample can be drawn.""" - with ops.Graph().as_default(), self.cached_session() as sess: - logits = np.asarray([ - [0., 0., 0.], # - [1., -1., 0.] - ]).astype(np.float32) - loss = loss_functions.OnehotCategoricalLogitsNegativeLogProbLoss( - array_ops.constant(logits)) - neg_log_prob = loss.evaluate_on_sample(42) - - # Simply ensure this doesn't crash. As the output is random, it's - # difficult to say if the output is correct or not... - neg_log_prob = sess.run(neg_log_prob) - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/kfac/python/kernel_tests/op_queue_test.py b/tensorflow/contrib/kfac/python/kernel_tests/op_queue_test.py deleted file mode 100644 index 4fae4374e1..0000000000 --- a/tensorflow/contrib/kfac/python/kernel_tests/op_queue_test.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tf.contrib.kfac.op_queue.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.kfac.python.ops import op_queue -from tensorflow.python.framework import ops as tf_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.platform import test - - -class OpQueueTest(test.TestCase): - - def testNextOp(self): - """Ensures all ops get selected eventually.""" - with tf_ops.Graph().as_default(): - ops = [ - math_ops.add(1, 2), - math_ops.subtract(1, 2), - math_ops.reduce_mean([1, 2]), - ] - queue = op_queue.OpQueue(ops, seed=0) - - with self.cached_session() as sess: - # Ensure every inv update op gets selected. - selected_ops = set([queue.next_op(sess) for _ in ops]) - self.assertEqual(set(ops), set(selected_ops)) - - # Ensure additional calls don't create any new ops. - selected_ops.add(queue.next_op(sess)) - self.assertEqual(set(ops), set(selected_ops)) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/kfac/python/kernel_tests/optimizer_test.py b/tensorflow/contrib/kfac/python/kernel_tests/optimizer_test.py deleted file mode 100644 index 0b0de12ce6..0000000000 --- a/tensorflow/contrib/kfac/python/kernel_tests/optimizer_test.py +++ /dev/null @@ -1,219 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tf.contrib.kfac.optimizer.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.kfac.python.ops import fisher_factors as ff -from tensorflow.contrib.kfac.python.ops import layer_collection as lc -from tensorflow.contrib.kfac.python.ops import optimizer -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables as tf_variables -from tensorflow.python.platform import test - - -# We need to set these constants since the numerical values used in the tests -# were chosen when these used to be the defaults. -ff.set_global_constants(init_covariances_at_zero=False, - zero_debias=False, - init_inverses_at_zero=False) - - -def dummy_layer_collection(): - lcoll = lc.LayerCollection() - dummy = array_ops.constant([1., 2.]) - lcoll.register_categorical_predictive_distribution(logits=dummy) - return lcoll - - -class OptimizerTest(test.TestCase): - - def testOptimizerInitInvalidMomentumRegistration(self): - with self.assertRaises(ValueError): - optimizer.KfacOptimizer( - 0.1, 0.2, 0.3, lc.LayerCollection(), momentum_type='foo') - - def testOptimizerInit(self): - with ops.Graph().as_default(): - layer_collection = lc.LayerCollection() - - inputs = array_ops.ones((2, 1)) * 2 - weights_val = np.ones((1, 1), dtype=np.float32) * 3. - weights = variable_scope.get_variable( - 'w', initializer=array_ops.constant(weights_val)) - bias = variable_scope.get_variable( - 'b', initializer=init_ops.zeros_initializer(), shape=(1, 1)) - output = math_ops.matmul(inputs, weights) + bias - - layer_collection.register_fully_connected((weights, bias), inputs, output) - - logits = math_ops.tanh(output) - targets = array_ops.constant([[0.], [1.]]) - output = math_ops.reduce_mean( - nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets)) - - layer_collection.register_categorical_predictive_distribution(logits) - - optimizer.KfacOptimizer( - 0.1, - 0.2, - 0.3, - layer_collection, - momentum=0.5, - momentum_type='regular') - - def testSquaredFisherNorm(self): - with ops.Graph().as_default(), self.cached_session() as sess: - grads_and_vars = [(array_ops.constant([[1., 2.], [3., 4.]]), None), - (array_ops.constant([[2., 3.], [4., 5.]]), None)] - pgrads_and_vars = [(array_ops.constant([[3., 4.], [5., 6.]]), None), - (array_ops.constant([[7., 8.], [9., 10.]]), None)] - opt = optimizer.KfacOptimizer(0.1, 0.2, 0.3, dummy_layer_collection()) - sq_norm = opt._squared_fisher_norm(grads_and_vars, pgrads_and_vars) - self.assertAlmostEqual(174., sess.run(sq_norm), places=5) - - def testUpdateClipCoeff(self): - with ops.Graph().as_default(), self.cached_session() as sess: - grads_and_vars = [(array_ops.constant([[1., 2.], [3., 4.]]), None), - (array_ops.constant([[2., 3.], [4., 5.]]), None)] - pgrads_and_vars = [(array_ops.constant([[3., 4.], [5., 6.]]), None), - (array_ops.constant([[7., 8.], [9., 10.]]), None)] - lrate = 0.1 - - # Note: without rescaling, the squared Fisher norm of the update - # is 1.74 - - # If the update already satisfies the norm constraint, there should - # be no rescaling. - opt = optimizer.KfacOptimizer( - lrate, 0.2, 0.3, dummy_layer_collection(), norm_constraint=10.) - coeff = opt._update_clip_coeff(grads_and_vars, pgrads_and_vars) - self.assertAlmostEqual(1., sess.run(coeff), places=5) - - # If the update violates the constraint, it should be rescaled to - # be on the constraint boundary. - opt = optimizer.KfacOptimizer( - lrate, 0.2, 0.3, dummy_layer_collection(), norm_constraint=0.5) - coeff = opt._update_clip_coeff(grads_and_vars, pgrads_and_vars) - sq_norm_pgrad = opt._squared_fisher_norm(grads_and_vars, pgrads_and_vars) - sq_norm_update = lrate**2 * coeff**2 * sq_norm_pgrad - self.assertAlmostEqual(0.5, sess.run(sq_norm_update), places=5) - - def testComputeUpdateStepsRegular(self): - # TODO(olganw): implement this. - pass - - def testComputeUpdateStepsAdam(self): - # TODO(olganw): implement this. - pass - - def testUpdateVelocities(self): - with ops.Graph().as_default(), self.cached_session() as sess: - layers = lc.LayerCollection() - layers.register_categorical_predictive_distribution( - array_ops.constant([1.0])) - opt = optimizer.KfacOptimizer( - 0.1, 0.2, 0.3, layers, momentum=0.5, momentum_type='regular') - x = variable_scope.get_variable('x', initializer=array_ops.ones((2, 2))) - y = variable_scope.get_variable( - 'y', initializer=array_ops.ones((2, 2)) * 2) - vec1 = array_ops.ones((2, 2)) * 3 - vec2 = array_ops.ones((2, 2)) * 4 - - model_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - update_op = opt._update_velocities([(vec1, x), (vec2, y)], 0.5) - opt_vars = [ - v for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - if v not in model_vars - ] - - sess.run(tf_variables.global_variables_initializer()) - old_opt_vars = sess.run(opt_vars) - - # Optimizer vars start out at 0. - for opt_var in old_opt_vars: - self.assertAllEqual(sess.run(array_ops.zeros_like(opt_var)), opt_var) - - sess.run(update_op) - new_opt_vars = sess.run(opt_vars) - # After one update, the velocities are equal to the vectors. - for vec, opt_var in zip([vec1, vec2], new_opt_vars): - self.assertAllEqual(sess.run(vec), opt_var) - - sess.run(update_op) - final_opt_vars = sess.run(opt_vars) - for first, second in zip(new_opt_vars, final_opt_vars): - self.assertFalse(np.equal(first, second).all()) - - def testApplyGradients(self): - with ops.Graph().as_default(), self.cached_session() as sess: - layer_collection = lc.LayerCollection() - - inputs = array_ops.ones((2, 1)) * 2 - weights_val = np.ones((1, 1), dtype=np.float32) * 3. - weights = variable_scope.get_variable( - 'w', initializer=array_ops.constant(weights_val)) - bias = variable_scope.get_variable( - 'b', initializer=init_ops.zeros_initializer(), shape=(1, 1)) - output = math_ops.matmul(inputs, weights) + bias - - layer_collection.register_fully_connected((weights, bias), inputs, output) - - logits = math_ops.tanh(output) - targets = array_ops.constant([[0.], [1.]]) - output = math_ops.reduce_mean( - nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets)) - - layer_collection.register_categorical_predictive_distribution(logits) - - opt = optimizer.KfacOptimizer( - 0.1, - 0.2, - 0.3, - layer_collection, - momentum=0.5, - momentum_type='regular') - (cov_update_thunks, - inv_update_thunks) = opt.make_vars_and_create_op_thunks() - cov_update_ops = tuple(thunk() for thunk in cov_update_thunks) - inv_update_ops = tuple(thunk() for thunk in inv_update_thunks) - - grads_and_vars = opt.compute_gradients(output, [weights, bias]) - all_vars = [grad_and_var[1] for grad_and_var in grads_and_vars] - - op = opt.apply_gradients(grads_and_vars) - - sess.run(tf_variables.global_variables_initializer()) - old_vars = sess.run(all_vars) - sess.run(cov_update_ops) - sess.run(inv_update_ops) - sess.run(op) - new_vars = sess.run(all_vars) - - for old_var, new_var in zip(old_vars, new_vars): - self.assertNotEqual(old_var, new_var) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py b/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py deleted file mode 100644 index 7df79a3c7f..0000000000 --- a/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py +++ /dev/null @@ -1,410 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tf.contrib.kfac.utils.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import numpy.random as npr - -from tensorflow.contrib.kfac.python.ops import utils -from tensorflow.contrib.tpu.python.tpu import tpu_function -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables -from tensorflow.python.platform import test - - -class SequenceDictTest(test.TestCase): - - def testSequenceDictInit(self): - seq_dict = utils.SequenceDict() - self.assertFalse(seq_dict._dict) - - def testSequenceDictInitWithIterable(self): - reg_dict = {'a': 'foo', 'b': 'bar'} - itr = zip(reg_dict.keys(), reg_dict.values()) - seq_dict = utils.SequenceDict(itr) - self.assertEqual(reg_dict, seq_dict._dict) - - def testGetItemSingleKey(self): - seq_dict = utils.SequenceDict({'a': 'foo', 'b': 'bar'}) - self.assertEqual('foo', seq_dict['a']) - - def testGetItemMultipleKeys(self): - seq_dict = utils.SequenceDict({'a': 'foo', 'b': 'bar'}) - self.assertEqual(['foo', 'bar'], seq_dict[('a', 'b')]) - - def testSetItemSingleKey(self): - seq_dict = utils.SequenceDict() - seq_dict['a'] = 'foo' - self.assertEqual([('a', 'foo')], seq_dict.items()) - - def testSetItemMultipleKeys(self): - seq_dict = utils.SequenceDict() - keys = ('a', 'b', 'c') - values = ('foo', 'bar', 'baz') - seq_dict[keys] = values - self.assertItemsEqual(list(zip(keys, values)), seq_dict.items()) - - -class SubGraphTest(test.TestCase): - - def testBasicGraph(self): - a = array_ops.constant([[1., 2.], [3., 4.]]) - b = array_ops.constant([[5., 6.], [7., 8.]]) - c = a + b - d = a * b - sub_graph = utils.SubGraph((c,)) - self.assertTrue(sub_graph.is_member(a)) - self.assertTrue(sub_graph.is_member(b)) - self.assertTrue(sub_graph.is_member(c)) - self.assertFalse(sub_graph.is_member(d)) - - def testRepeatedAdds(self): - a = array_ops.constant([[1., 2.], [3., 4.]]) - b = array_ops.constant([[5., 6.], [7., 8.]]) - c = a + b + a # note that a appears twice in this graph - sub_graph = utils.SubGraph((c,)) - self.assertTrue(sub_graph.is_member(a)) - self.assertTrue(sub_graph.is_member(b)) - self.assertTrue(sub_graph.is_member(c)) - - def testFilterList(self): - a = array_ops.constant([[1., 2.], [3., 4.]]) - b = array_ops.constant([[5., 6.], [7., 8.]]) - c = a + b - d = a * b - sub_graph = utils.SubGraph((c,)) - input_list = [b, d] - filtered_list = sub_graph.filter_list(input_list) - self.assertEqual(filtered_list, [b]) - - def testVariableUses(self): - with ops.Graph().as_default(): - var = variable_scope.get_variable('var', shape=[10, 10]) - resource_var = variable_scope.get_variable( - 'resource_var', shape=[10, 10], use_resource=True) - x = array_ops.zeros([3, 10]) - z0 = math_ops.matmul(x, var) + math_ops.matmul(x, var) - z1 = math_ops.matmul(x, resource_var) - sub_graph = utils.SubGraph((z0, z1)) - self.assertEqual(2, sub_graph.variable_uses(var)) - self.assertEqual(1, sub_graph.variable_uses(resource_var)) - - -class UtilsTest(test.TestCase): - - def _fully_connected_layer_params(self): - weights_part = array_ops.constant([[1., 2.], [4., 3.]]) - bias_part = array_ops.constant([1., 2.]) - return (weights_part, bias_part) - - def _conv_layer_params(self): - weights_shape = 2, 2, 3, 4 - biases_shape = weights_shape[-1:] - weights = array_ops.constant(npr.RandomState(0).randn(*weights_shape)) - biases = array_ops.constant(npr.RandomState(1).randn(*biases_shape)) - return (weights, biases) - - def testFullyConnectedLayerParamsTupleToMat2d(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - layer_params = self._fully_connected_layer_params() - output = utils.layer_params_to_mat2d(layer_params) - self.assertListEqual([3, 2], output.get_shape().as_list()) - self.assertAllClose( - sess.run(output), np.array([[1., 2.], [4., 3.], [1., 2.]])) - - def testFullyConnectedLayerParamsTensorToMat2d(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - layer_params = self._fully_connected_layer_params() - output = utils.layer_params_to_mat2d(layer_params[0]) - self.assertListEqual([2, 2], output.get_shape().as_list()) - self.assertAllClose(sess.run(output), np.array([[1., 2.], [4., 3.]])) - - def testConvLayerParamsTupleToMat2d(self): - with ops.Graph().as_default(): - random_seed.set_random_seed(200) - layer_params = self._conv_layer_params() - output = utils.layer_params_to_mat2d(layer_params) - self.assertListEqual([2 * 2 * 3 + 1, 4], output.get_shape().as_list()) - - def testKron(self): - with ops.Graph().as_default(), self.cached_session() as sess: - mat1 = np.array([[1., 2.], [3., 4.]]) - mat2 = np.array([[5., 6.], [7., 8.]]) - mat1_tf = array_ops.constant(mat1) - mat2_tf = array_ops.constant(mat2) - ans_tf = sess.run(utils.kronecker_product(mat1_tf, mat2_tf)) - ans_np = np.kron(mat1, mat2) - self.assertAllClose(ans_tf, ans_np) - - def testMat2dToFullyConnectedLayerParamsTuple(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - vector_template = self._fully_connected_layer_params() - mat2d = array_ops.constant([[5., 4.], [3., 2.], [1., 0.]]) - - output = sess.run(utils.mat2d_to_layer_params(vector_template, mat2d)) - - self.assertIsInstance(output, tuple) - self.assertEqual(len(output), 2) - a, b = output - self.assertAllClose(a, np.array([[5., 4.], [3., 2.]])) - self.assertAllClose(b, np.array([1., 0.])) - - def testMat2dToFullyConnectedLayerParamsTensor(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - vector_template = self._fully_connected_layer_params()[0] - mat2d = array_ops.constant([[5., 4.], [3., 2.]]) - - output = sess.run(utils.mat2d_to_layer_params(vector_template, mat2d)) - - self.assertAllClose(output, np.array([[5., 4.], [3., 2.]])) - - def testTensorsToColumn(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - - vector = array_ops.constant(np.array([[0., 1.], [2., 3.]])) - output = utils.tensors_to_column(vector) - self.assertListEqual([4, 1], output.get_shape().as_list()) - self.assertAllClose(sess.run(output), np.array([0., 1., 2., 3.])[:, None]) - - vector = self._fully_connected_layer_params() - output = utils.tensors_to_column(vector) - self.assertListEqual([6, 1], output.get_shape().as_list()) - self.assertAllClose( - sess.run(output), np.array([1., 2., 4., 3., 1., 2.])[:, None]) - - vector = list(vector) - vector.append(array_ops.constant([[6.], [7.], [8.], [9.]])) - - output = utils.tensors_to_column(vector) - self.assertListEqual([10, 1], output.get_shape().as_list()) - self.assertAllClose( - sess.run(output), - np.array([1., 2., 4., 3., 1., 2., 6., 7., 8., 9.])[:, None]) - - def testColumnToTensors(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - - vector_template = array_ops.constant(np.array([[0., 1.], [2., 3.]])) - colvec = array_ops.constant(np.arange(4.)[:, None]) - output = sess.run(utils.column_to_tensors(vector_template, colvec)) - self.assertAllClose(output, np.array([[0., 1.], [2., 3.]])) - - vector_template = self._fully_connected_layer_params() - colvec = array_ops.constant(np.arange(6.)[:, None]) - output = sess.run(utils.column_to_tensors(vector_template, colvec)) - - self.assertIsInstance(output, tuple) - self.assertEqual(len(output), 2) - a, b = output - self.assertAllClose(a, np.array([[0., 1.], [2., 3.]])) - self.assertAllClose(b, np.array([4., 5.])) - - vector_template = list(vector_template) - vector_template.append(array_ops.constant([[6.], [7.], [8.], [9.]])) - colvec = array_ops.constant(np.arange(10.)[:, None]) - output = sess.run(utils.column_to_tensors(vector_template, colvec)) - self.assertIsInstance(output, tuple) - self.assertEqual(len(output), 3) - a, b, c = output - self.assertAllClose(a, np.array([[0., 1.], [2., 3.]])) - self.assertAllClose(b, np.array([4., 5.])) - self.assertAllClose(c, np.array([[6.], [7.], [8.], [9.]])) - - def testPosDefInvCholesky(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - npr.seed(0) - square = lambda x: np.dot(x, x.T) - - size = 3 - x = square(npr.randn(size, size)) - damp = 0.1 - identity = linalg_ops.eye(size, dtype=dtypes.float64) - - tf_inv = utils.posdef_inv_cholesky(array_ops.constant(x), identity, damp) - np_inv = np.linalg.inv(x + damp * np.eye(size)) - self.assertAllClose(sess.run(tf_inv), np_inv) - - def testPosDefInvMatrixInverse(self): - with ops.Graph().as_default(), self.cached_session() as sess: - random_seed.set_random_seed(200) - npr.seed(0) - square = lambda x: np.dot(x, x.T) - - size = 3 - x = square(npr.randn(size, size)) - damp = 0.1 - identity = linalg_ops.eye(size, dtype=dtypes.float64) - - tf_inv = utils.posdef_inv_matrix_inverse( - array_ops.constant(x), identity, damp) - np_inv = np.linalg.inv(x + damp * np.eye(size)) - self.assertAllClose(sess.run(tf_inv), np_inv) - - def testCrossReplicaMean(self): - """Ensures that cross_replica_mean() executes only when num_shards > 1.""" - with ops.Graph().as_default(): - with tpu_function.tpu_shard_context(4): - tensor = array_ops.zeros([], dtype=dtypes.float32) - mean = utils.cross_replica_mean(tensor) - self.assertNotEqual(mean, tensor) - - with ops.Graph().as_default(): - with tpu_function.tpu_shard_context(1): - tensor = array_ops.zeros([], dtype=dtypes.float32) - mean = utils.cross_replica_mean(tensor) - self.assertEqual(mean, tensor) - - with ops.Graph().as_default(): - with self.assertRaises(ValueError): # Outside of TPU context. - tensor = array_ops.zeros([], dtype=dtypes.float32) - mean = utils.cross_replica_mean(tensor) - - def testBatchExecute(self): - """Ensure batch_execute runs in a round-robin fashion.""" - - def increment_var(var): - return lambda: var.assign_add(1) - - with ops.Graph().as_default(), self.cached_session() as sess: - i = variable_scope.get_variable('i', initializer=0) - accumulators = [ - variable_scope.get_variable('var%d' % j, initializer=0) - for j in range(3) - ] - thunks = [increment_var(var) for var in accumulators] - increment_accumulators = utils.batch_execute(i, thunks, 2) - increment_i = i.assign_add(1) - - sess.run(variables.global_variables_initializer()) - - # Ensure one op per thunk. - self.assertEqual(3, len(increment_accumulators)) - - # Ensure round-robin execution. - values = [] - for _ in range(5): - sess.run(increment_accumulators) - sess.run(increment_i) - values.append(sess.run(accumulators)) - self.assertAllClose( - [ - [1, 1, 0], # - [2, 1, 1], # - [2, 2, 2], # - [3, 3, 2], # - [4, 3, 3] - ], - values) - - def testExtractConvolutionPatches(self): - with ops.Graph().as_default(), self.cached_session() as sess: - batch_size = 10 - image_spatial_shape = [9, 10, 11] - in_channels = out_channels = 32 - kernel_spatial_shape = [5, 3, 3] - spatial_strides = [1, 2, 1] - spatial_dilation = [1, 1, 1] - padding = 'SAME' - - images = random_ops.random_uniform( - [batch_size] + image_spatial_shape + [in_channels], seed=0) - kernel_shape = kernel_spatial_shape + [in_channels, out_channels] - kernel = random_ops.random_uniform(kernel_shape, seed=1) - - # Ensure shape matches expectation. - patches = utils.extract_convolution_patches( - images, - kernel_shape, - padding, - strides=spatial_strides, - dilation_rate=spatial_dilation) - result_spatial_shape = ( - patches.shape.as_list()[1:1 + len(image_spatial_shape)]) - self.assertEqual(patches.shape.as_list(), - [batch_size] + result_spatial_shape + - kernel_spatial_shape + [in_channels]) - - # Ensure extract...patches() + matmul() and convolution() implementation - # give the same answer. - outputs = nn_ops.convolution( - images, - kernel, - padding, - strides=spatial_strides, - dilation_rate=spatial_dilation) - - patches_flat = array_ops.reshape( - patches, [-1, np.prod(kernel_spatial_shape) * in_channels]) - kernel_flat = array_ops.reshape(kernel, [-1, out_channels]) - outputs_flat = math_ops.matmul(patches_flat, kernel_flat) - - outputs_, outputs_flat_ = sess.run([outputs, outputs_flat]) - self.assertAllClose(outputs_.flatten(), outputs_flat_.flatten()) - - def testExtractPointwiseConv2dPatches(self): - with ops.Graph().as_default(), self.cached_session() as sess: - batch_size = 10 - image_height = image_width = 8 - in_channels = out_channels = 3 - kernel_height = kernel_width = 1 - strides = [1, 1, 1, 1] - padding = 'VALID' - - images = random_ops.random_uniform( - [batch_size, image_height, image_width, in_channels], seed=0) - kernel_shape = [kernel_height, kernel_width, in_channels, out_channels] - kernel = random_ops.random_uniform(kernel_shape, seed=1) - - # Ensure shape matches expectation. - patches = utils.extract_pointwise_conv2d_patches(images, kernel_shape) - self.assertEqual(patches.shape.as_list(), [ - batch_size, image_height, image_width, kernel_height, kernel_width, - in_channels - ]) - - # Ensure extract...patches() + matmul() and conv2d() implementation - # give the same answer. - outputs = nn_ops.conv2d(images, kernel, strides, padding) - - patches_flat = array_ops.reshape( - patches, [-1, kernel_height * kernel_width * in_channels]) - kernel_flat = array_ops.reshape(kernel, [-1, out_channels]) - outputs_flat = math_ops.matmul(patches_flat, kernel_flat) - - outputs_, outputs_flat_ = sess.run([outputs, outputs_flat]) - self.assertAllClose(outputs_.flatten(), outputs_flat_.flatten()) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/contrib/kfac/python/ops/BUILD b/tensorflow/contrib/kfac/python/ops/BUILD deleted file mode 100644 index 3c01eb65e7..0000000000 --- a/tensorflow/contrib/kfac/python/ops/BUILD +++ /dev/null @@ -1,263 +0,0 @@ -package(default_visibility = [ - "//tensorflow/contrib/kfac:__pkg__", - "//tensorflow/contrib/kfac/python/kernel_tests:__pkg__", -]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -py_library( - name = "fisher_blocks", - srcs = ["fisher_blocks.py"], - srcs_version = "PY2AND3", - deps = [ - ":fisher_factors", - ":utils", - "//tensorflow/python:array_ops", - "//tensorflow/python:math_ops", - "@six_archive//:six", - ], -) - -py_library( - name = "fisher_blocks_lib", - srcs = ["fisher_blocks_lib.py"], - srcs_version = "PY2AND3", - deps = [ - ":fisher_blocks", - "//tensorflow/python:util", - ], -) - -py_library( - name = "fisher_factors", - srcs = ["fisher_factors.py"], - srcs_version = "PY2AND3", - deps = [ - ":linear_operator", - ":utils", - "//tensorflow/python:array_ops", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:init_ops", - "//tensorflow/python:linalg_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:random_ops", - "//tensorflow/python:special_math_ops", - "//tensorflow/python:training", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - "//third_party/py/numpy", - "@six_archive//:six", - ], -) - -py_library( - name = "fisher_factors_lib", - srcs = ["fisher_factors_lib.py"], - srcs_version = "PY2AND3", - deps = [ - ":fisher_factors", - "//tensorflow/python:util", - ], -) - -py_library( - name = "linear_operator", - srcs = ["linear_operator.py"], - srcs_version = "PY2AND3", - deps = [ - ":utils", - "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python/ops/linalg", - "@six_archive//:six", - ], -) - -py_library( - name = "loss_functions", - srcs = ["loss_functions.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/distributions:distributions_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:tensor_shape", - "//tensorflow/python/ops/distributions", - "@six_archive//:six", - ], -) - -py_library( - name = "loss_functions_lib", - srcs = ["loss_functions_lib.py"], - srcs_version = "PY2AND3", - deps = [ - ":loss_functions", - "//tensorflow/python:util", - ], -) - -py_library( - name = "curvature_matrix_vector_products", - srcs = ["curvature_matrix_vector_products.py"], - srcs_version = "PY2AND3", - deps = [ - ":utils", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:util", - ], -) - -py_library( - name = "curvature_matrix_vector_products_lib", - srcs = ["curvature_matrix_vector_products_lib.py"], - srcs_version = "PY2AND3", - deps = [ - ":curvature_matrix_vector_products", - "//tensorflow/python:util", - ], -) - -py_library( - name = "layer_collection", - srcs = ["layer_collection.py"], - srcs_version = "PY2AND3", - deps = [ - ":fisher_blocks", - ":loss_functions", - ":utils", - "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform", - "//tensorflow/python:util", - "//tensorflow/python:variable_scope", - "@six_archive//:six", - ], -) - -py_library( - name = "layer_collection_lib", - srcs = ["layer_collection_lib.py"], - srcs_version = "PY2AND3", - deps = [ - ":layer_collection", - "//tensorflow/python:util", - ], -) - -py_library( - name = "kfac_optimizer", - srcs = [ - "optimizer.py", - ], - srcs_version = "PY2AND3", - deps = [ - ":curvature_matrix_vector_products", - ":fisher_estimator", - "//tensorflow/python:array_ops", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:linalg_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:state_ops", - "//tensorflow/python:training", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - ], -) - -py_library( - name = "kfac_optimizer_lib", - srcs = [ - "optimizer_lib.py", - ], - srcs_version = "PY2AND3", - deps = [ - ":kfac_optimizer", - "//tensorflow/python:util", - ], -) - -py_library( - name = "fisher_estimator", - srcs = [ - "estimator.py", - "placement.py", - ], - srcs_version = "PY2AND3", - deps = [ - ":utils", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:framework_ops", - "//tensorflow/python:gradients", - "//tensorflow/python:util", - "//third_party/py/numpy", - "@six_archive//:six", - ], -) - -py_library( - name = "fisher_estimator_lib", - srcs = [ - "estimator_lib.py", - ], - srcs_version = "PY2AND3", - deps = [ - ":fisher_estimator", - "//tensorflow/python:util", - ], -) - -py_library( - name = "utils", - srcs = ["utils.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/tpu", - "//tensorflow/python:array_ops", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:gradients", - "//tensorflow/python:linalg_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:random_ops", - "//third_party/py/numpy", - ], -) - -py_library( - name = "utils_lib", - srcs = ["utils_lib.py"], - srcs_version = "PY2AND3", - deps = [ - ":utils", - "//tensorflow/python:util", - ], -) - -py_library( - name = "op_queue", - srcs = ["op_queue.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/python:framework_ops", - ], -) - -py_library( - name = "op_queue_lib", - srcs = ["op_queue_lib.py"], - srcs_version = "PY2AND3", - deps = [ - ":op_queue", - "//tensorflow/python:util", - ], -) diff --git a/tensorflow/contrib/kfac/python/ops/curvature_matrix_vector_products.py b/tensorflow/contrib/kfac/python/ops/curvature_matrix_vector_products.py deleted file mode 100644 index 21b5cde9b9..0000000000 --- a/tensorflow/contrib/kfac/python/ops/curvature_matrix_vector_products.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Curvature matrix-vector multiplication.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.kfac.python.ops import utils -from tensorflow.python.ops import gradients_impl -from tensorflow.python.ops import math_ops -from tensorflow.python.util import nest - - -class CurvatureMatrixVectorProductComputer(object): - """Class for computing matrix-vector products for Fishers, GGNs and Hessians. - - In other words we compute M*v where M is the matrix, v is the vector, and - * refers to standard matrix/vector multiplication (not element-wise - multiplication). - - The matrices are defined in terms of some differential quantity of the total - loss function with respect to a provided list of tensors ("wrt_tensors"). - For example, the Fisher associated with a log-prob loss w.r.t. the - parameters. - - The 'vecs' argument to each method are lists of tensors that must be the - size as the corresponding ones from "wrt_tensors". They represent - the vector being multiplied. - - "factors" of the matrix M are defined as matrices B such that B*B^T = M. - Methods that multiply by the factor B take a 'loss_inner_vecs' argument - instead of 'vecs', which must be a list of tensors with shapes given by the - corresponding XXX_inner_shapes property. - - Note that matrix-vector products are not normalized by the batch size, nor - are any damping terms added to the results. These things can be easily - applied externally, if desired. - - See for example: www.cs.utoronto.ca/~jmartens/docs/HF_book_chapter.pdf - and https://arxiv.org/abs/1412.1193 for more information about the - generalized Gauss-Newton, Fisher, etc., and how to compute matrix-vector - products. - """ - - def __init__(self, losses, wrt_tensors): - """Create a CurvatureMatrixVectorProductComputer object. - - Args: - losses: A list of LossFunction instances whose sum defines the total loss. - wrt_tensors: A list of Tensors to compute the differential quantities - (defining the matrices) with respect to. See class description for more - info. - """ - self._losses = losses - self._inputs_to_losses = list(loss.inputs for loss in losses) - self._inputs_to_losses_flat = nest.flatten(self._inputs_to_losses) - self._wrt_tensors = wrt_tensors - - @property - def _total_loss(self): - return math_ops.add_n(tuple(loss.evaluate() for loss in self._losses)) - - # Jacobian multiplication functions: - def _multiply_jacobian(self, vecs): - """Multiply vecs by the Jacobian of losses.""" - # We stop gradients at wrt_tensors to produce partial derivatives (which is - # what we want for Jacobians). - jacobian_vecs_flat = utils.fwd_gradients( - self._inputs_to_losses_flat, self._wrt_tensors, grad_xs=vecs, - stop_gradients=self._wrt_tensors) - return nest.pack_sequence_as(self._inputs_to_losses, jacobian_vecs_flat) - - def _multiply_jacobian_transpose(self, loss_vecs): - """Multiply vecs by the transpose Jacobian of losses.""" - loss_vecs_flat = nest.flatten(loss_vecs) - # We stop gradients at wrt_tensors to produce partial derivatives (which is - # what we want for Jacobians). - return gradients_impl.gradients( - self._inputs_to_losses_flat, self._wrt_tensors, grad_ys=loss_vecs_flat, - stop_gradients=self._wrt_tensors) - - # Losses Fisher/Hessian multiplication functions: - def _multiply_loss_fisher(self, loss_vecs): - """Multiply loss_vecs by Fisher of total loss.""" - return tuple( - loss.multiply_fisher(loss_vec) - for loss, loss_vec in zip(self._losses, loss_vecs)) - - def _multiply_loss_fisher_factor(self, loss_inner_vecs): - """Multiply loss_inner_vecs by factor of Fisher of total loss.""" - return tuple( - loss.multiply_fisher_factor(loss_vec) - for loss, loss_vec in zip(self._losses, loss_inner_vecs)) - - def _multiply_loss_fisher_factor_transpose(self, loss_vecs): - """Multiply loss_vecs by transpose factor of Fisher of total loss.""" - return tuple( - loss.multiply_fisher_factor_transpose(loss_vec) - for loss, loss_vec in zip(self._losses, loss_vecs)) - - def _multiply_loss_hessian(self, loss_vecs): - """Multiply loss_vecs by Hessian of total loss.""" - return tuple( - loss.multiply_hessian(loss_vec) - for loss, loss_vec in zip(self._losses, loss_vecs)) - - def _multiply_loss_hessian_factor(self, loss_inner_vecs): - """Multiply loss_inner_vecs by factor of Hessian of total loss.""" - return tuple( - loss.multiply_hessian_factor(loss_vec) - for loss, loss_vec in zip(self._losses, loss_inner_vecs)) - - def _multiply_loss_hessian_factor_transpose(self, loss_vecs): - """Multiply loss_vecs by transpose factor of Hessian of total loss.""" - return tuple( - loss.multiply_hessian_factor_transpose(loss_vec) - for loss, loss_vec in zip(self._losses, loss_vecs)) - - # Matrix-vector product functions: - def multiply_fisher(self, vecs): - """Multiply vecs by Fisher of total loss.""" - jacobian_vecs = self._multiply_jacobian(vecs) - loss_fisher_jacobian_vecs = self._multiply_loss_fisher(jacobian_vecs) - return self._multiply_jacobian_transpose(loss_fisher_jacobian_vecs) - - def multiply_fisher_factor_transpose(self, vecs): - """Multiply vecs by transpose of factor of Fisher of total loss.""" - jacobian_vecs = self._multiply_jacobian(vecs) - return self._multiply_loss_fisher_factor_transpose(jacobian_vecs) - - def multiply_fisher_factor(self, loss_inner_vecs): - """Multiply loss_inner_vecs by factor of Fisher of total loss.""" - fisher_factor_transpose_vecs = self._multiply_loss_fisher_factor_transpose( - loss_inner_vecs) - return self._multiply_jacobian_transpose(fisher_factor_transpose_vecs) - - def multiply_hessian(self, vecs): - """Multiply vecs by Hessian of total loss.""" - return gradients_impl.gradients( - gradients_impl.gradients(self._total_loss, self._wrt_tensors), - self._wrt_tensors, - grad_ys=vecs) - - def multiply_generalized_gauss_newton(self, vecs): - """Multiply vecs by generalized Gauss-Newton of total loss.""" - jacobian_vecs = self._multiply_jacobian(vecs) - loss_hessian_jacobian_vecs = self._multiply_loss_hessian(jacobian_vecs) - return self._multiply_jacobian_transpose(loss_hessian_jacobian_vecs) - - def multiply_generalized_gauss_newton_factor_transpose(self, vecs): - """Multiply vecs by transpose of factor of GGN of total loss.""" - jacobian_vecs = self._multiply_jacobian(vecs) - return self._multiply_loss_hessian_factor_transpose(jacobian_vecs) - - def multiply_generalized_gauss_newton_factor(self, loss_inner_vecs): - """Multiply loss_inner_vecs by factor of GGN of total loss.""" - hessian_factor_transpose_vecs = ( - self._multiply_loss_hessian_factor_transpose(loss_inner_vecs)) - return self._multiply_jacobian_transpose(hessian_factor_transpose_vecs) - - # Shape properties for multiply_XXX_factor methods: - @property - def fisher_factor_inner_shapes(self): - """Shapes required by multiply_fisher_factor.""" - return tuple(loss.fisher_factor_inner_shape for loss in self._losses) - - @property - def generalized_gauss_newton_factor_inner_shapes(self): - """Shapes required by multiply_generalized_gauss_newton_factor.""" - return tuple(loss.hessian_factor_inner_shape for loss in self._losses) diff --git a/tensorflow/contrib/kfac/python/ops/curvature_matrix_vector_products_lib.py b/tensorflow/contrib/kfac/python/ops/curvature_matrix_vector_products_lib.py deleted file mode 100644 index 6e8c6404dc..0000000000 --- a/tensorflow/contrib/kfac/python/ops/curvature_matrix_vector_products_lib.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Curvature matrix-vector multiplication.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.kfac.python.ops.curvature_matrix_vector_products import * -from tensorflow.python.util.all_util import remove_undocumented -# pylint: enable=unused-import,line-too-long,wildcard-import - -_allowed_symbols = [ - 'CurvatureMatrixVectorProductComputer', -] - -remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/estimator.py b/tensorflow/contrib/kfac/python/ops/estimator.py deleted file mode 100644 index 323234c403..0000000000 --- a/tensorflow/contrib/kfac/python/ops/estimator.py +++ /dev/null @@ -1,516 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Defines the high-level Fisher estimator class.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc -import numpy as np -import six - -from tensorflow.contrib.kfac.python.ops import placement -from tensorflow.contrib.kfac.python.ops import utils -from tensorflow.python.framework import ops as tf_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import gradients_impl -from tensorflow.python.ops import variable_scope -from tensorflow.python.util import nest - - -# The linter is confused. -# pylint: disable=abstract-class-instantiated -def make_fisher_estimator(placement_strategy=None, **kwargs): - """Creates Fisher estimator instances based on the placement strategy. - - For example if the `placement_strategy` is 'round_robin' then - `FisherEstimatorRoundRobin` instance is returned. - - Args: - placement_strategy: `string`, Strategy to be used for placing covariance - variables, covariance ops and inverse ops. Check - `placement.FisherEstimatorRoundRobin` for a concrete example. - **kwargs: Arguments to be passed into `FisherEstimator` class initializer. - - Returns: - An instance of class which inherits from `FisherEstimator` and the mixin - which implements specific placement strategy. See, - `FisherEstimatorRoundRobin` which inherits from `FisherEstimator` and - `RoundRobinPlacementMixin`. - - Raises: - ValueError: If the `placement_strategy` is not equal to 'round_robin'. - """ - if placement_strategy in [None, "round_robin"]: - return FisherEstimatorRoundRobin(**kwargs) - else: - raise ValueError("Unimplemented vars and ops " - "placement strategy : {}".format(placement_strategy)) -# pylint: enable=abstract-class-instantiated - - -@six.add_metaclass(abc.ABCMeta) -class FisherEstimator(object): - """Fisher estimator class supporting various approximations of the Fisher. - - This is an abstract base class which does not implement a strategy for - placing covariance variables, covariance update ops and inverse update ops. - The placement strategies are implemented in `placement.py`. See - `FisherEstimatorRoundRobin` for example of a concrete subclass with - a round-robin placement strategy. - """ - - def __init__(self, - variables, - cov_ema_decay, - damping, - layer_collection, - exps=(-1,), - estimation_mode="gradients", - colocate_gradients_with_ops=True, - name="FisherEstimator", - compute_cholesky=False, - compute_cholesky_inverse=False): - """Create a FisherEstimator object. - - Args: - variables: A `list` of variables or `callable` which returns the variables - for which to estimate the Fisher. This must match the variables - registered in layer_collection (if it is not None). - cov_ema_decay: The decay factor used when calculating the covariance - estimate moving averages. - damping: float. The damping factor used to stabilize training due to - errors in the local approximation with the Fisher information matrix, - and to regularize the update direction by making it closer to the - gradient. (Higher damping means the update looks more like a standard - gradient update - see Tikhonov regularization.) - layer_collection: The layer collection object, which holds the Fisher - blocks, Kronecker factors, and losses associated with the - graph. - exps: List of floats or ints. These represent the different matrix - powers of the approximate Fisher that the FisherEstimator will be able - to multiply vectors by. If the user asks for a matrix power other - one of these (or 1, which is always supported), there will be a - failure. (Default: (-1,)) - estimation_mode: The type of estimator to use for the Fishers. Can be - 'gradients', 'empirical', 'curvature_prop', or 'exact'. - (Default: 'gradients'). 'gradients' is the basic estimation approach - from the original K-FAC paper. 'empirical' computes the 'empirical' - Fisher information matrix (which uses the data's distribution for the - targets, as opposed to the true Fisher which uses the model's - distribution) and requires that each registered loss have specified - targets. 'curvature_propagation' is a method which estimates the - Fisher using self-products of random 1/-1 vectors times "half-factors" - of the Fisher, as described here: https://arxiv.org/abs/1206.6464 . - Finally, 'exact' is the obvious generalization of Curvature - Propagation to compute the exact Fisher (modulo any additional - diagonal or Kronecker approximations) by looping over one-hot vectors - for each coordinate of the output instead of using 1/-1 vectors. It - is more expensive to compute than the other three options by a factor - equal to the output dimension, roughly speaking. - colocate_gradients_with_ops: Whether we should request gradients be - colocated with their respective ops. (Default: True) - name: A string. A name given to this estimator, which is added to the - variable scope when constructing variables and ops. - (Default: "FisherEstimator") - compute_cholesky: Bool. Whether or not the FisherEstimator will be - able to multiply vectors by the Cholesky factor. - (Default: False) - compute_cholesky_inverse: Bool. Whether or not the FisherEstimator - will be able to multiply vectors by the Cholesky factor inverse. - (Default: False) - Raises: - ValueError: If no losses have been registered with layer_collection. - """ - self._variables = variables - self._cov_ema_decay = cov_ema_decay - self._damping = damping - self._estimation_mode = estimation_mode - self._layers = layer_collection - self._gradient_fns = { - "gradients": self._get_grads_lists_gradients, - "empirical": self._get_grads_lists_empirical, - "curvature_prop": self._get_grads_lists_curvature_prop, - "exact": self._get_grads_lists_exact - } - self._colocate_gradients_with_ops = colocate_gradients_with_ops - - self._made_vars = False - self._exps = exps - self._compute_cholesky = compute_cholesky - self._compute_cholesky_inverse = compute_cholesky_inverse - - self._name = name - - @property - def variables(self): - if callable(self._variables): - return self._variables() - else: - return self._variables - - @property - def damping(self): - return self._damping - - @property - def blocks(self): - """All registered FisherBlocks.""" - return self._layers.get_blocks() - - @property - def factors(self): - """All registered FisherFactors.""" - return self._layers.get_factors() - - @property - def name(self): - return self._name - - @abc.abstractmethod - def make_vars_and_create_op_thunks(self, scope=None): - """Make vars and create op thunks with a specific placement strategy. - - For each factor, all of that factor's cov variables and their associated - update ops will be placed on a particular device. A new device is chosen - for each factor by cycling through list of devices in the cov_devices - argument. If cov_devices is None then no explicit device placement occurs. - - An analogous strategy is followed for inverse update ops, with the list of - devices being given by the inv_devices argument. - - Inverse variables on the other hand are not placed on any specific device - (they will just use the current the device placement context, whatever - that happens to be). The idea is that the inverse variable belong where - they will be accessed most often, which is the device that actually applies - the preconditioner to the gradient. The user will be responsible for setting - the device context for this. - - Args: - scope: A string or None. If None it will be set to the name of this - estimator (given by the name property). All variables will be created, - and all thunks will execute, inside of a variable scope of the given - name. (Default: None) - - Returns: - cov_update_thunks: List of cov update thunks. Corresponds one-to-one with - the list of factors given by the "factors" property. - inv_update_thunks: List of inv update thunks. Corresponds one-to-one with - the list of factors given by the "factors" property. - """ - pass - - def _apply_transformation(self, vecs_and_vars, transform): - """Applies an block-wise transformation to the corresponding vectors. - - Args: - vecs_and_vars: List of (vector, variable) pairs. - transform: A function of the form f(fb, vec), where vec is the vector - to transform and fb is its corresponding block in the matrix, that - returns the transformed vector. - - Returns: - A list of (transformed vector, var) pairs in the same order as - vecs_and_vars. - """ - - vecs = utils.SequenceDict((var, vec) for vec, var in vecs_and_vars) - - trans_vecs = utils.SequenceDict() - - for params, fb in self._layers.fisher_blocks.items(): - trans_vecs[params] = transform(fb, vecs[params]) - - return [(trans_vecs[var], var) for _, var in vecs_and_vars] - - def multiply_inverse(self, vecs_and_vars): - """Multiplies the vecs by the corresponding (damped) inverses of the blocks. - - Args: - vecs_and_vars: List of (vector, variable) pairs. - - Returns: - A list of (transformed vector, var) pairs in the same order as - vecs_and_vars. - """ - return self.multiply_matpower(-1, vecs_and_vars) - - def multiply(self, vecs_and_vars): - """Multiplies the vectors by the corresponding (damped) blocks. - - Args: - vecs_and_vars: List of (vector, variable) pairs. - - Returns: - A list of (transformed vector, var) pairs in the same order as - vecs_and_vars. - """ - return self.multiply_matpower(1, vecs_and_vars) - - def multiply_matpower(self, exp, vecs_and_vars): - """Multiplies the vecs by the corresponding matrix powers of the blocks. - - Args: - exp: A float representing the power to raise the blocks by before - multiplying it by the vector. - vecs_and_vars: List of (vector, variable) pairs. - - Returns: - A list of (transformed vector, var) pairs in the same order as - vecs_and_vars. - """ - assert exp in self._exps - - fcn = lambda fb, vec: fb.multiply_matpower(vec, exp) - return self._apply_transformation(vecs_and_vars, fcn) - - def multiply_cholesky(self, vecs_and_vars, transpose=False): - """Multiplies the vecs by the corresponding Cholesky factors. - - Args: - vecs_and_vars: List of (vector, variable) pairs. - transpose: Bool. If true the Cholesky factors are transposed before - multiplying the vecs. (Default: False) - - Returns: - A list of (transformed vector, var) pairs in the same order as - vecs_and_vars. - """ - assert self._compute_cholesky - - fcn = lambda fb, vec: fb.multiply_cholesky(vec, transpose=transpose) - return self._apply_transformation(vecs_and_vars, fcn) - - def multiply_cholesky_inverse(self, vecs_and_vars, transpose=False): - """Mults the vecs by the inverses of the corresponding Cholesky factors. - - Note: if you are using Cholesky inverse multiplication to sample from - a matrix-variate Gaussian you will want to multiply by the transpose. - Let L be the Cholesky factor of F and observe that - - L^-T * L^-1 = (L * L^T)^-1 = F^-1 . - - Thus we want to multiply by L^-T in order to sample from Gaussian with - covariance F^-1. - - Args: - vecs_and_vars: List of (vector, variable) pairs. - transpose: Bool. If true the Cholesky factor inverses are transposed - before multiplying the vecs. (Default: False) - - Returns: - A list of (transformed vector, var) pairs in the same order as - vecs_and_vars. - """ - assert self._compute_cholesky_inverse - - fcn = lambda fb, vec: fb.multiply_cholesky_inverse(vec, transpose=transpose) - return self._apply_transformation(vecs_and_vars, fcn) - - def _instantiate_factors(self): - """Instantiates FisherFactors' variables. - - Raises: - ValueError: If estimation_mode was improperly specified at construction. - """ - blocks = self.blocks - tensors_to_compute_grads = [ - block.tensors_to_compute_grads() for block in blocks - ] - - try: - grads_lists = self._gradient_fns[self._estimation_mode]( - tensors_to_compute_grads) - except KeyError: - raise ValueError("Unrecognized value {} for estimation_mode.".format( - self._estimation_mode)) - - for grads_list, block in zip(grads_lists, blocks): - block.instantiate_factors(grads_list, self.damping) - - def _check_vars_unmade_and_set_made_flag(self): - if self._made_vars: - raise Exception("Already made variables.") - self._made_vars = True - - def made_vars(self): - return self._made_vars - - def _register_matrix_functions(self): - for block in self.blocks: - for exp in self._exps: - block.register_matpower(exp) - if self._compute_cholesky: - block.register_cholesky() - if self._compute_cholesky_inverse: - block.register_cholesky_inverse() - - def _finalize_layer_collection(self): - self._layers.create_subgraph() - self._layers.check_registration(self.variables) - self._instantiate_factors() - self._register_matrix_functions() - - def create_ops_and_vars_thunks(self, scope=None): - """Create thunks that make the ops and vars on demand. - - This function returns 4 lists of thunks: cov_variable_thunks, - cov_update_thunks, inv_variable_thunks, and inv_update_thunks. - - The length of each list is the number of factors and the i-th element of - each list corresponds to the i-th factor (given by the "factors" property). - - Note that the execution of these thunks must happen in a certain - partial order. The i-th element of cov_variable_thunks must execute - before the i-th element of cov_update_thunks (and also the i-th element - of inv_update_thunks). Similarly, the i-th element of inv_variable_thunks - must execute before the i-th element of inv_update_thunks. - - TL;DR (oversimplified): Execute the thunks according to the order that - they are returned. - - Args: - scope: A string or None. If None it will be set to the name of this - estimator (given by the name property). All thunks will execute inside - of a variable scope of the given name. (Default: None) - Returns: - cov_variable_thunks: A list of thunks that make the cov variables. - cov_update_thunks: A list of thunks that make the cov update ops. - inv_variable_thunks: A list of thunks that make the inv variables. - inv_update_thunks: A list of thunks that make the inv update ops. - """ - self._check_vars_unmade_and_set_made_flag() - - self._finalize_layer_collection() - - scope = self.name if scope is None else scope - - cov_variable_thunks = [ - self._create_cov_variable_thunk(factor, scope) - for factor in self.factors - ] - cov_update_thunks = [ - self._create_cov_update_thunk(factor, scope) for factor in self.factors - ] - inv_variable_thunks = [ - self._create_inv_variable_thunk(factor, scope) - for factor in self.factors - ] - inv_update_thunks = [ - self._create_inv_update_thunk(factor, scope) for factor in self.factors - ] - - return (cov_variable_thunks, cov_update_thunks, - inv_variable_thunks, inv_update_thunks) - - def _create_cov_variable_thunk(self, factor, scope): - """Constructs a covariance variable thunk for a single FisherFactor.""" - - def thunk(): - with variable_scope.variable_scope(scope): - return factor.instantiate_cov_variables() - - return thunk - - def _create_cov_update_thunk(self, factor, scope): - """Constructs a covariance update thunk for a single FisherFactor.""" - - def thunk(): - with variable_scope.variable_scope(scope): - return factor.make_covariance_update_op(self._cov_ema_decay) - - return thunk - - def _create_inv_variable_thunk(self, factor, scope): - """Constructs a inverse variable thunk for a single FisherFactor.""" - - def thunk(): - with variable_scope.variable_scope(scope): - return factor.instantiate_inv_variables() - - return thunk - - def _create_inv_update_thunk(self, factor, scope): - """Constructs an inverse update thunk for a single FisherFactor.""" - - def thunk(): - with variable_scope.variable_scope(scope): - return control_flow_ops.group(factor.make_inverse_update_ops()) - - return thunk - - def _get_grads_lists_gradients(self, tensors): - # Passing in a list of loss values is better than passing in the sum as - # the latter creates unnessesary ops on the default device - grads_flat = gradients_impl.gradients( - self._layers.eval_losses_on_samples(), - nest.flatten(tensors), - colocate_gradients_with_ops=self._colocate_gradients_with_ops) - grads_all = nest.pack_sequence_as(tensors, grads_flat) - return tuple((grad,) for grad in grads_all) - - def _get_grads_lists_empirical(self, tensors): - # Passing in a list of loss values is better than passing in the sum as - # the latter creates unnecessary ops on the default device - grads_flat = gradients_impl.gradients( - self._layers.eval_losses(), - nest.flatten(tensors), - colocate_gradients_with_ops=self._colocate_gradients_with_ops) - grads_all = nest.pack_sequence_as(tensors, grads_flat) - return tuple((grad,) for grad in grads_all) - - def _get_transformed_random_signs(self): - transformed_random_signs = [] - for loss in self._layers.losses: - with tf_ops.colocate_with(self._layers.loss_colocation_ops[loss]): - transformed_random_signs.append( - loss.multiply_fisher_factor( - utils.generate_random_signs(loss.fisher_factor_inner_shape))) - return transformed_random_signs - - def _get_grads_lists_curvature_prop(self, tensors): - loss_inputs = list(loss.inputs for loss in self._layers.losses) - transformed_random_signs = self._get_transformed_random_signs() - grads_flat = gradients_impl.gradients( - nest.flatten(loss_inputs), - nest.flatten(tensors), - grad_ys=nest.flatten(transformed_random_signs), - colocate_gradients_with_ops=self._colocate_gradients_with_ops) - grads_all = nest.pack_sequence_as(tensors, grads_flat) - return tuple((grad,) for grad in grads_all) - - def _get_grads_lists_exact(self, tensors): - """No docstring required.""" - # Loop over all coordinates of all losses. - grads_all = [] - for loss in self._layers.losses: - with tf_ops.colocate_with(self._layers.loss_colocation_ops[loss]): - for index in np.ndindex(*loss.fisher_factor_inner_static_shape[1:]): - transformed_one_hot = loss.multiply_fisher_factor_replicated_one_hot( - index) - grads_flat = gradients_impl.gradients( - loss.inputs, - nest.flatten(tensors), - grad_ys=transformed_one_hot, - colocate_gradients_with_ops=self._colocate_gradients_with_ops) - grads_all.append(nest.pack_sequence_as(tensors, grads_flat)) - return zip(*grads_all) - - -class FisherEstimatorRoundRobin(placement.RoundRobinPlacementMixin, - FisherEstimator): - """Fisher estimator which provides round robin device placement strategy.""" - pass diff --git a/tensorflow/contrib/kfac/python/ops/estimator_lib.py b/tensorflow/contrib/kfac/python/ops/estimator_lib.py deleted file mode 100644 index 9c9fef471f..0000000000 --- a/tensorflow/contrib/kfac/python/ops/estimator_lib.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Defines the high-level Fisher estimator class.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.kfac.python.ops.estimator import * -from tensorflow.python.util.all_util import remove_undocumented -# pylint: enable=unused-import,line-too-long,wildcard-import - -_allowed_symbols = [ - 'FisherEstimator', - 'make_fisher_estimator', -] - -remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py deleted file mode 100644 index 9fa6eb7dcd..0000000000 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ /dev/null @@ -1,1752 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""FisherBlock definitions. - -This library contains classes for estimating blocks in a model's Fisher -Information matrix. Suppose one has a model that parameterizes a posterior -distribution over 'y' given 'x' with parameters 'params', p(y | x, params). Its -Fisher Information matrix is given by, - - $$F(params) = E[ v(x, y, params) v(x, y, params)^T ]$$ - -where, - - $$v(x, y, params) = (d / d params) log p(y | x, params)$$ - -and the expectation is taken with respect to the data's distribution for 'x' and -the model's posterior distribution for 'y', - - x ~ p(x) - y ~ p(y | x, params) - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc -import enum # pylint: disable=g-bad-import-order - -import numpy as np -import six - -from tensorflow.contrib.kfac.python.ops import fisher_factors -from tensorflow.contrib.kfac.python.ops import utils -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.util import nest - -# For blocks corresponding to convolutional layers, or any type of block where -# the parameters can be thought of as being replicated in time or space, -# we want to adjust the scale of the damping by -# damping /= num_replications ** NORMALIZE_DAMPING_POWER -NORMALIZE_DAMPING_POWER = 1.0 - -# Methods for adjusting damping for FisherBlocks. See -# compute_pi_adjusted_damping() for details. -PI_OFF_NAME = "off" -PI_TRACENORM_NAME = "tracenorm" -PI_TYPE = PI_TRACENORM_NAME - - -def set_global_constants(normalize_damping_power=None, pi_type=None): - """Sets various global constants used by the classes in this module.""" - global NORMALIZE_DAMPING_POWER - global PI_TYPE - - if normalize_damping_power is not None: - NORMALIZE_DAMPING_POWER = normalize_damping_power - - if pi_type is not None: - PI_TYPE = pi_type - - -def normalize_damping(damping, num_replications): - """Normalize damping after adjusting scale by NORMALIZE_DAMPING_POWER.""" - if NORMALIZE_DAMPING_POWER: - return damping / (num_replications ** NORMALIZE_DAMPING_POWER) - return damping - - -def compute_pi_tracenorm(left_cov, right_cov): - r"""Computes the scalar constant pi for Tikhonov regularization/damping. - - $$\pi = \sqrt{ (trace(A) / dim(A)) / (trace(B) / dim(B)) }$$ - See section 6.3 of https://arxiv.org/pdf/1503.05671.pdf for details. - - Args: - left_cov: A LinearOperator object. The left Kronecker factor "covariance". - right_cov: A LinearOperator object. The right Kronecker factor "covariance". - - Returns: - The computed scalar constant pi for these Kronecker Factors (as a Tensor). - """ - # Instead of dividing by the dim of the norm, we multiply by the dim of the - # other norm. This works out the same in the ratio. - left_norm = left_cov.trace() * int(right_cov.domain_dimension) - right_norm = right_cov.trace() * int(left_cov.domain_dimension) - return math_ops.sqrt(left_norm / right_norm) - - -def compute_pi_adjusted_damping(left_cov, right_cov, damping): - - if PI_TYPE == PI_TRACENORM_NAME: - pi = compute_pi_tracenorm(left_cov, right_cov) - return (damping * pi, damping / pi) - - elif PI_TYPE == PI_OFF_NAME: - return (damping, damping) - - -class PackagedFunc(object): - """A Python thunk with a stable ID. - - Enables stable names for lambdas. - """ - - def __init__(self, func, func_id): - """Initializes PackagedFunc. - - Args: - func: a zero-arg Python function. - func_id: a hashable, function that produces a hashable, or a list/tuple - thereof. - """ - self._func = func - func_id = func_id if isinstance(func_id, (tuple, list)) else (func_id,) - self._func_id = func_id - - def __call__(self): - return self._func() - - @property - def func_id(self): - """A hashable identifier for this function.""" - return tuple(elt() if callable(elt) else elt for elt in self._func_id) - - -def _package_func(func, func_id): - return PackagedFunc(func, func_id) - - -@six.add_metaclass(abc.ABCMeta) -class FisherBlock(object): - """Abstract base class for objects modeling approximate Fisher matrix blocks. - - Subclasses must implement register_matpower, multiply_matpower, - instantiate_factors, tensors_to_compute_grads, and num_registered_towers - methods. - """ - - def __init__(self, layer_collection): - self._layer_collection = layer_collection - - @abc.abstractmethod - def instantiate_factors(self, grads_list, damping): - """Creates and registers the component factors of this Fisher block. - - Args: - grads_list: A list gradients (each a Tensor or tuple of Tensors) with - respect to the tensors returned by tensors_to_compute_grads() that - are to be used to estimate the block. - damping: The damping factor (float or Tensor). - """ - pass - - @abc.abstractmethod - def register_matpower(self, exp): - """Registers a matrix power to be computed by the block. - - Args: - exp: A float representing the power to raise the block by. - """ - pass - - @abc.abstractmethod - def register_cholesky(self): - """Registers a Cholesky factor to be computed by the block.""" - pass - - @abc.abstractmethod - def register_cholesky_inverse(self): - """Registers an inverse Cholesky factor to be computed by the block.""" - pass - - def register_inverse(self): - """Registers a matrix inverse to be computed by the block.""" - self.register_matpower(-1) - - @abc.abstractmethod - def multiply_matpower(self, vector, exp): - """Multiplies the vector by the (damped) matrix-power of the block. - - Args: - vector: The vector (a Tensor or tuple of Tensors) to be multiplied. - exp: A float representing the power to raise the block by before - multiplying it by the vector. - - Returns: - The vector left-multiplied by the (damped) matrix-power of the block. - """ - pass - - def multiply_inverse(self, vector): - """Multiplies the vector by the (damped) inverse of the block. - - Args: - vector: The vector (a Tensor or tuple of Tensors) to be multiplied. - - Returns: - The vector left-multiplied by the (damped) inverse of the block. - """ - return self.multiply_matpower(vector, -1) - - def multiply(self, vector): - """Multiplies the vector by the (damped) block. - - Args: - vector: The vector (a Tensor or tuple of Tensors) to be multiplied. - - Returns: - The vector left-multiplied by the (damped) block. - """ - return self.multiply_matpower(vector, 1) - - @abc.abstractmethod - def multiply_cholesky(self, vector, transpose=False): - """Multiplies the vector by the (damped) Cholesky-factor of the block. - - Args: - vector: The vector (a Tensor or tuple of Tensors) to be multiplied. - transpose: Bool. If true the Cholesky factor is transposed before - multiplying the vector. (Default: False) - - Returns: - The vector left-multiplied by the (damped) Cholesky-factor of the block. - """ - pass - - @abc.abstractmethod - def multiply_cholesky_inverse(self, vector, transpose=False): - """Multiplies vector by the (damped) inverse Cholesky-factor of the block. - - Args: - vector: The vector (a Tensor or tuple of Tensors) to be multiplied. - transpose: Bool. If true the Cholesky factor inverse is transposed - before multiplying the vector. (Default: False) - Returns: - Vector left-multiplied by (damped) inverse Cholesky-factor of the block. - """ - pass - - @abc.abstractmethod - def tensors_to_compute_grads(self): - """Returns the Tensor(s) with respect to which this FisherBlock needs grads. - """ - pass - - @abc.abstractproperty - def num_registered_towers(self): - """Number of towers registered for this FisherBlock. - - Typically equal to the number of towers in a multi-tower setup. - """ - pass - - -class FullFB(FisherBlock): - """FisherBlock using a full matrix estimate (no approximations). - - FullFB uses a full matrix estimate (no approximations), and should only ever - be used for very low dimensional parameters. - - Note that this uses the naive "square the sum estimator", and so is applicable - to any type of parameter in principle, but has very high variance. - """ - - def __init__(self, layer_collection, params): - """Creates a FullFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - params: The parameters of this layer (Tensor or tuple of Tensors). - """ - self._batch_sizes = [] - self._params = params - - super(FullFB, self).__init__(layer_collection) - - def instantiate_factors(self, grads_list, damping): - self._damping_func = _package_func(lambda: damping, (damping,)) - - self._factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullFactor, (grads_list, self._batch_size)) - - def register_matpower(self, exp): - self._factor.register_matpower(exp, self._damping_func) - - def register_cholesky(self): - self._factor.register_cholesky(self._damping_func) - - def register_cholesky_inverse(self): - self._factor.register_cholesky_inverse(self._damping_func) - - def _multiply_matrix(self, matrix, vector, transpose=False): - vector_flat = utils.tensors_to_column(vector) - out_flat = matrix.matmul(vector_flat, adjoint=transpose) - return utils.column_to_tensors(vector, out_flat) - - def multiply_matpower(self, vector, exp): - matrix = self._factor.get_matpower(exp, self._damping_func) - return self._multiply_matrix(matrix, vector) - - def multiply_cholesky(self, vector, transpose=False): - matrix = self._factor.get_cholesky(self._damping_func) - return self._multiply_matrix(matrix, vector, transpose=transpose) - - def multiply_cholesky_inverse(self, vector, transpose=False): - matrix = self._factor.get_cholesky_inverse(self._damping_func) - return self._multiply_matrix(matrix, vector, transpose=transpose) - - def full_fisher_block(self): - """Explicitly constructs the full Fisher block.""" - return self._factor.get_cov_as_linear_operator().to_dense() - - def tensors_to_compute_grads(self): - return self._params - - def register_additional_tower(self, batch_size): - """Register an additional tower. - - Args: - batch_size: The batch size, used in the covariance estimator. - """ - self._batch_sizes.append(batch_size) - - @property - def num_registered_towers(self): - return len(self._batch_sizes) - - @property - def _batch_size(self): - return math_ops.reduce_sum(self._batch_sizes) - - -@six.add_metaclass(abc.ABCMeta) -class DiagonalFB(FisherBlock): - """A base class for FisherBlocks that use diagonal approximations.""" - - def register_matpower(self, exp): - # Not needed for this. Matrix powers are computed on demand in the - # diagonal case - pass - - def register_cholesky(self): - # Not needed for this. Cholesky's are computed on demand in the - # diagonal case - pass - - def register_cholesky_inverse(self): - # Not needed for this. Cholesky inverses's are computed on demand in the - # diagonal case - pass - - def _multiply_matrix(self, matrix, vector): - vector_flat = utils.tensors_to_column(vector) - out_flat = matrix.matmul(vector_flat) - return utils.column_to_tensors(vector, out_flat) - - def multiply_matpower(self, vector, exp): - matrix = self._factor.get_matpower(exp, self._damping_func) - return self._multiply_matrix(matrix, vector) - - def multiply_cholesky(self, vector, transpose=False): - matrix = self._factor.get_cholesky(self._damping_func) - return self._multiply_matrix(matrix, vector) - - def multiply_cholesky_inverse(self, vector, transpose=False): - matrix = self._factor.get_cholesky_inverse(self._damping_func) - return self._multiply_matrix(matrix, vector) - - def full_fisher_block(self): - return self._factor.get_cov_as_linear_operator().to_dense() - - -class NaiveDiagonalFB(DiagonalFB): - """FisherBlock using a diagonal matrix approximation. - - This type of approximation is generically applicable but quite primitive. - - Note that this uses the naive "square the sum estimator", and so is applicable - to any type of parameter in principle, but has very high variance. - """ - - def __init__(self, layer_collection, params): - """Creates a NaiveDiagonalFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - params: The parameters of this layer (Tensor or tuple of Tensors). - """ - self._params = params - self._batch_sizes = [] - - super(NaiveDiagonalFB, self).__init__(layer_collection) - - def instantiate_factors(self, grads_list, damping): - self._damping_func = _package_func(lambda: damping, (damping,)) - - self._factor = self._layer_collection.make_or_get_factor( - fisher_factors.NaiveDiagonalFactor, (grads_list, self._batch_size)) - - def tensors_to_compute_grads(self): - return self._params - - def register_additional_tower(self, batch_size): - """Register an additional tower. - - Args: - batch_size: The batch size, used in the covariance estimator. - """ - self._batch_sizes.append(batch_size) - - @property - def num_registered_towers(self): - return len(self._batch_sizes) - - @property - def _batch_size(self): - return math_ops.reduce_sum(self._batch_sizes) - - -class InputOutputMultiTower(object): - """Mix-in class for blocks with inputs & outputs and multiple mini-batches.""" - - def __init__(self, *args, **kwargs): - self.__inputs = [] - self.__outputs = [] - super(InputOutputMultiTower, self).__init__(*args, **kwargs) - - def _process_data(self, grads_list): - """Process data into the format used by the factors. - - This function takes inputs and grads_lists data and processes it into - one of the formats expected by the FisherFactor classes (depending on - the value of the global configuration variable TOWER_STRATEGY). - - The initial format of self._inputs is expected to be a list of Tensors - over towers. Similarly grads_lists is expected to be a list over sources - of such lists. - - If TOWER_STRATEGY is "concat", 'inputs' becomes a tuple containing a single - tensor (represented as a PartitionedTensor object) equal to the - concatenation (across towers) of all of the elements of self._inputs. And - similarly grads_list is formatted into a tuple (over sources) of such - tensors (also represented as PartitionedTensors). - - If TOWER_STRATEGY is "separate", formatting of inputs and grads_list - remains unchanged from the initial format (although possibly converting - from lists into tuples). - - Args: - grads_list: grads_list in its initial format (see above). - - Returns: - inputs: self._inputs transformed into the appropriate format (see - above). - grads_list: grads_list transformed into the appropriate format (see - above). - - Raises: - ValueError: if TOWER_STRATEGY is not one of "separate" or "concat". - """ - inputs = self._inputs - # inputs is a list over towers of Tensors - # grads_list is a list of list with the first index being sources and the - # second being towers. - if fisher_factors.TOWER_STRATEGY == "concat": - # Merge towers together into a PartitionedTensor. We package it in - # a singleton tuple since the factors will expect a list over towers - inputs = (utils.PartitionedTensor(inputs),) - # Do the same for grads_list but preserve leading sources dimension - grads_list = tuple((utils.PartitionedTensor(grads),) - for grads in grads_list) - elif fisher_factors.TOWER_STRATEGY == "separate": - inputs = tuple(inputs) - grads_list = tuple(grads_list) - - else: - raise ValueError("Global config variable TOWER_STRATEGY must be one of " - "'concat' or 'separate'.") - - return inputs, grads_list - - def tensors_to_compute_grads(self): - """Tensors to compute derivative of loss with respect to.""" - return tuple(self._outputs) - - def register_additional_tower(self, inputs, outputs): - self._inputs.append(inputs) - self._outputs.append(outputs) - - @property - def num_registered_towers(self): - result = len(self._inputs) - assert result == len(self._outputs) - return result - - @property - def _inputs(self): - return self.__inputs - - @property - def _outputs(self): - return self.__outputs - - -class FullyConnectedDiagonalFB(InputOutputMultiTower, DiagonalFB): - """FisherBlock for fully-connected (dense) layers using a diagonal approx. - - Estimates the Fisher Information matrix's diagonal entries for a fully - connected layer. Unlike NaiveDiagonalFB this uses the low-variance "sum of - squares" estimator. - - Let 'params' be a vector parameterizing a model and 'i' an arbitrary index - into it. We are interested in Fisher(params)[i, i]. This is, - - $$Fisher(params)[i, i] = E[ v(x, y, params) v(x, y, params)^T ][i, i] - = E[ v(x, y, params)[i] ^ 2 ]$$ - - Consider fully connected layer in this model with (unshared) weight matrix - 'w'. For an example 'x' that produces layer inputs 'a' and output - preactivations 's', - - $$v(x, y, w) = vec( a (d loss / d s)^T )$$ - - This FisherBlock tracks Fisher(params)[i, i] for all indices 'i' corresponding - to the layer's parameters 'w'. - """ - - def __init__(self, layer_collection, has_bias=False): - """Creates a FullyConnectedDiagonalFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - has_bias: Whether the component Kronecker factors have an additive bias. - (Default: False) - """ - self._has_bias = has_bias - - super(FullyConnectedDiagonalFB, self).__init__(layer_collection) - - def instantiate_factors(self, grads_list, damping): - inputs, grads_list = self._process_data(grads_list) - - self._factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedDiagonalFactor, - (inputs, grads_list, self._has_bias)) - - self._damping_func = _package_func(lambda: damping, (damping,)) - - -class ConvDiagonalFB(InputOutputMultiTower, DiagonalFB): - """FisherBlock for 2-D convolutional layers using a diagonal approx. - - Estimates the Fisher Information matrix's diagonal entries for a convolutional - layer. Unlike NaiveDiagonalFB this uses the low-variance "sum of squares" - estimator. - - Let 'params' be a vector parameterizing a model and 'i' an arbitrary index - into it. We are interested in Fisher(params)[i, i]. This is, - - $$Fisher(params)[i, i] = E[ v(x, y, params) v(x, y, params)^T ][i, i] - = E[ v(x, y, params)[i] ^ 2 ]$$ - - Consider a convoluational layer in this model with (unshared) filter matrix - 'w'. For an example image 'x' that produces layer inputs 'a' and output - preactivations 's', - - $$v(x, y, w) = vec( sum_{loc} a_{loc} (d loss / d s_{loc})^T )$$ - - where 'loc' is a single (x, y) location in an image. - - This FisherBlock tracks Fisher(params)[i, i] for all indices 'i' corresponding - to the layer's parameters 'w'. - """ - - def __init__(self, - layer_collection, - params, - strides, - padding, - data_format=None, - dilations=None): - """Creates a ConvDiagonalFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - params: The parameters (Tensor or tuple of Tensors) of this layer. If - kernel alone, a Tensor of shape [kernel_height, kernel_width, - in_channels, out_channels]. If kernel and bias, a tuple of 2 elements - containing the previous and a Tensor of shape [out_channels]. - strides: The stride size in this layer (1-D Tensor of length 4). - padding: The padding in this layer (e.g. "SAME"). - data_format: str or None. Format of input data. - dilations: List of 4 ints or None. Rate for dilation along all dimensions. - - Raises: - ValueError: if strides is not length-4. - ValueError: if dilations is not length-4. - ValueError: if channel is not last dimension. - """ - if len(strides) != 4: - raise ValueError("strides must contain 4 numbers.") - - if dilations is None: - dilations = [1, 1, 1, 1] - - if len(dilations) != 4: - raise ValueError("dilations must contain 4 numbers.") - - if not utils.is_data_format_channel_last(data_format): - raise ValueError("data_format must be channels-last.") - - self._strides = maybe_tuple(strides) - self._padding = padding - self._data_format = data_format - self._dilations = maybe_tuple(dilations) - self._has_bias = isinstance(params, (tuple, list)) - - fltr = params[0] if self._has_bias else params - self._filter_shape = tuple(fltr.shape.as_list()) - - if len(self._filter_shape) != 4: - raise ValueError( - "Convolution filter must be of shape" - " [filter_height, filter_width, in_channels, out_channels].") - - super(ConvDiagonalFB, self).__init__(layer_collection) - - def instantiate_factors(self, grads_list, damping): - inputs, grads_list = self._process_data(grads_list) - - # Infer number of locations upon which convolution is applied. - self._num_locations = num_conv_locations(inputs[0].shape.as_list(), - self._strides) - - self._factor = self._layer_collection.make_or_get_factor( - fisher_factors.ConvDiagonalFactor, - (inputs, grads_list, self._filter_shape, self._strides, self._padding, - self._data_format, self._dilations, self._has_bias)) - - def damping_func(): - return self._num_locations * normalize_damping(damping, - self._num_locations) - - damping_id = (self._num_locations, "mult", "normalize_damping", damping, - self._num_locations) - self._damping_func = _package_func(damping_func, damping_id) - - -class KroneckerProductFB(FisherBlock): - """A base class for blocks with separate input and output Kronecker factors. - - The Fisher block is approximated as a Kronecker product of the input and - output factors. - """ - - def _setup_damping(self, damping, normalization=None): - """Makes functions that compute the damping values for both factors.""" - def compute_damping(): - if normalization is not None: - maybe_normalized_damping = normalize_damping(damping, normalization) - else: - maybe_normalized_damping = damping - - return compute_pi_adjusted_damping( - self._input_factor.get_cov_as_linear_operator(), - self._output_factor.get_cov_as_linear_operator(), - maybe_normalized_damping**0.5) - - if normalization is not None: - damping_id = ("compute_pi_adjusted_damping", - "cov", self._input_factor.name, - "cov", self._output_factor.name, - "normalize_damping", damping, normalization, "power", 0.5) - else: - damping_id = ("compute_pi_adjusted_damping", - "cov", self._input_factor.name, - "cov", self._output_factor.name, - damping, "power", 0.5) - - self._input_damping_func = _package_func(lambda: compute_damping()[0], - damping_id + ("ref", 0)) - self._output_damping_func = _package_func(lambda: compute_damping()[1], - damping_id + ("ref", 1)) - - def register_matpower(self, exp): - self._input_factor.register_matpower(exp, self._input_damping_func) - self._output_factor.register_matpower(exp, self._output_damping_func) - - def register_cholesky(self): - self._input_factor.register_cholesky(self._input_damping_func) - self._output_factor.register_cholesky(self._output_damping_func) - - def register_cholesky_inverse(self): - self._input_factor.register_cholesky_inverse(self._input_damping_func) - self._output_factor.register_cholesky_inverse(self._output_damping_func) - - @property - def _renorm_coeff(self): - """Kronecker factor multiplier coefficient. - - If this FisherBlock is represented as 'FB = c * kron(left, right)', then - this is 'c'. - - Returns: - 0-D Tensor. - """ - return 1.0 - - def _multiply_factored_matrix(self, left_factor, right_factor, vector, - extra_scale=1.0, transpose_left=False, - transpose_right=False): - reshaped_vector = utils.layer_params_to_mat2d(vector) - reshaped_out = right_factor.matmul_right(reshaped_vector, - adjoint=transpose_right) - reshaped_out = left_factor.matmul(reshaped_out, - adjoint=transpose_left) - if extra_scale != 1.0: - reshaped_out *= math_ops.cast(extra_scale, dtype=reshaped_out.dtype) - return utils.mat2d_to_layer_params(vector, reshaped_out) - - def multiply_matpower(self, vector, exp): - left_factor = self._input_factor.get_matpower( - exp, self._input_damping_func) - right_factor = self._output_factor.get_matpower( - exp, self._output_damping_func) - extra_scale = float(self._renorm_coeff)**exp - return self._multiply_factored_matrix(left_factor, right_factor, vector, - extra_scale=extra_scale) - - def multiply_cholesky(self, vector, transpose=False): - left_factor = self._input_factor.get_cholesky(self._input_damping_func) - right_factor = self._output_factor.get_cholesky(self._output_damping_func) - extra_scale = float(self._renorm_coeff)**0.5 - return self._multiply_factored_matrix(left_factor, right_factor, vector, - extra_scale=extra_scale, - transpose_left=transpose, - transpose_right=not transpose) - - def multiply_cholesky_inverse(self, vector, transpose=False): - left_factor = self._input_factor.get_cholesky_inverse( - self._input_damping_func) - right_factor = self._output_factor.get_cholesky_inverse( - self._output_damping_func) - extra_scale = float(self._renorm_coeff)**-0.5 - return self._multiply_factored_matrix(left_factor, right_factor, vector, - extra_scale=extra_scale, - transpose_left=transpose, - transpose_right=not transpose) - - def full_fisher_block(self): - """Explicitly constructs the full Fisher block. - - Used for testing purposes. (In general, the result may be very large.) - - Returns: - The full Fisher block. - """ - left_factor = self._input_factor.get_cov_as_linear_operator().to_dense() - right_factor = self._output_factor.get_cov_as_linear_operator().to_dense() - return self._renorm_coeff * utils.kronecker_product(left_factor, - right_factor) - - -class EmbeddingKFACFB(InputOutputMultiTower, KroneckerProductFB): - """K-FAC FisherBlock for embedding layers. - - This FisherBlock is similar to FullyConnectedKFACBasicFB, except that its - input factor is approximated by a diagonal matrix. In the case that each - example references exactly one embedding, this approximation is exact. - - Does not support bias parameters. - """ - - def __init__(self, layer_collection, vocab_size): - """Creates a EmbeddingKFACFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - vocab_size: int. Size of vocabulary for this embedding layer. - """ - self._vocab_size = vocab_size - - super(EmbeddingKFACFB, self).__init__(layer_collection) - - def instantiate_factors(self, grads_list, damping): - """Instantiate Kronecker Factors for this FisherBlock. - - Args: - grads_list: List of list of Tensors. grads_list[i][j] is the - gradient of the loss with respect to 'outputs' from source 'i' and - tower 'j'. Each Tensor has shape [tower_minibatch_size, output_size]. - damping: 0-D Tensor or float. 'damping' * identity is approximately added - to this FisherBlock's Fisher approximation. - """ - inputs, grads_list = self._process_data(grads_list) - - self._input_factor = self._layer_collection.make_or_get_factor( - fisher_factors.EmbeddingInputKroneckerFactor, - (inputs, self._vocab_size)) - self._output_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedKroneckerFactor, (grads_list,)) - self._setup_damping(damping) - - -class FullyConnectedKFACBasicFB(InputOutputMultiTower, KroneckerProductFB): - """K-FAC FisherBlock for fully-connected (dense) layers. - - This uses the Kronecker-factorized approximation from the original - K-FAC paper (https://arxiv.org/abs/1503.05671) - """ - - def __init__(self, layer_collection, has_bias=False): - """Creates a FullyConnectedKFACBasicFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - has_bias: Whether the component Kronecker factors have an additive bias. - (Default: False) - """ - self._has_bias = has_bias - - super(FullyConnectedKFACBasicFB, self).__init__(layer_collection) - - def instantiate_factors(self, grads_list, damping): - """Instantiate Kronecker Factors for this FisherBlock. - - Args: - grads_list: List of list of Tensors. grads_list[i][j] is the - gradient of the loss with respect to 'outputs' from source 'i' and - tower 'j'. Each Tensor has shape [tower_minibatch_size, output_size]. - damping: 0-D Tensor or float. 'damping' * identity is approximately added - to this FisherBlock's Fisher approximation. - """ - inputs, grads_list = self._process_data(grads_list) - - self._input_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedKroneckerFactor, - ((inputs,), self._has_bias)) - self._output_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedKroneckerFactor, - (grads_list,)) - self._setup_damping(damping) - - -class ConvKFCBasicFB(InputOutputMultiTower, KroneckerProductFB): - r"""FisherBlock for convolutional layers using the basic KFC approx. - - Estimates the Fisher Information matrix's blog for a convolutional - layer. - - Consider a convolutional layer in this model with (unshared) filter matrix - 'w'. For a minibatch that produces inputs 'a' and output preactivations 's', - this FisherBlock estimates, - - $$F(w) = \#locations * kronecker(E[flat(a) flat(a)^T], - E[flat(ds) flat(ds)^T])$$ - - where - - $$ds = (d / ds) log p(y | x, w)$$ - #locations = number of (x, y) locations where 'w' is applied. - - where the expectation is taken over all examples and locations and flat() - concatenates an array's leading dimensions. - - See equation 23 in https://arxiv.org/abs/1602.01407 for details. - """ - - def __init__(self, - layer_collection, - params, - padding, - strides=None, - dilation_rate=None, - data_format=None, - extract_patches_fn=None): - """Creates a ConvKFCBasicFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - params: The parameters (Tensor or tuple of Tensors) of this layer. If - kernel alone, a Tensor of shape [..spatial_filter_shape.., - in_channels, out_channels]. If kernel and bias, a tuple of 2 elements - containing the previous and a Tensor of shape [out_channels]. - padding: str. Padding method. - strides: List of ints or None. Contains [..spatial_filter_strides..] if - 'extract_patches_fn' is compatible with tf.nn.convolution(), else - [1, ..spatial_filter_strides, 1]. - dilation_rate: List of ints or None. Rate for dilation along each spatial - dimension if 'extract_patches_fn' is compatible with - tf.nn.convolution(), else [1, ..spatial_dilation_rates.., 1]. - data_format: str or None. Format of input data. - extract_patches_fn: str or None. Name of function that extracts image - patches. One of "extract_convolution_patches", "extract_image_patches", - "extract_pointwise_conv2d_patches". - """ - self._padding = padding - self._strides = maybe_tuple(strides) - self._dilation_rate = maybe_tuple(dilation_rate) - self._data_format = data_format - self._extract_patches_fn = extract_patches_fn - self._has_bias = isinstance(params, (tuple, list)) - - fltr = params[0] if self._has_bias else params - self._filter_shape = tuple(fltr.shape.as_list()) - - super(ConvKFCBasicFB, self).__init__(layer_collection) - - def instantiate_factors(self, grads_list, damping): - inputs, grads_list = self._process_data(grads_list) - - # Infer number of locations upon which convolution is applied. - self._num_locations = num_conv_locations(inputs[0].shape.as_list(), - self._strides) - - self._input_factor = self._layer_collection.make_or_get_factor( - fisher_factors.ConvInputKroneckerFactor, - (inputs, self._filter_shape, self._padding, self._strides, - self._dilation_rate, self._data_format, self._extract_patches_fn, - self._has_bias)) - self._output_factor = self._layer_collection.make_or_get_factor( - fisher_factors.ConvOutputKroneckerFactor, (grads_list,)) - - self._setup_damping(damping, normalization=self._num_locations) - - @property - def _renorm_coeff(self): - return self._num_locations - - -class DepthwiseConvDiagonalFB(ConvDiagonalFB): - """FisherBlock for depthwise_conv2d(). - - Equivalent to ConvDiagonalFB applied to each input channel in isolation. - """ - - def __init__(self, - layer_collection, - params, - strides, - padding, - rate=None, - data_format=None): - """Creates a DepthwiseConvKFCBasicFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - params: Tensor of shape [filter_height, filter_width, in_channels, - channel_multiplier]. - strides: List of 4 ints. Strides along all dimensions. - padding: str. Padding method. - rate: List of 4 ints or None. Rate for dilation along all dimensions. - data_format: str or None. Format of input data. - - Raises: - NotImplementedError: If parameters contains bias. - ValueError: If filter is not 4-D. - ValueError: If strides is not length-4. - ValueError: If rates is not length-2. - ValueError: If channels are not last dimension. - """ - if isinstance(params, (tuple, list)): - raise NotImplementedError("Bias not yet supported.") - - if params.shape.ndims != 4: - raise ValueError("Filter must be 4-D.") - - if len(strides) != 4: - raise ValueError("strides must account for 4 dimensions.") - - if rate is not None: - if len(rate) != 2: - raise ValueError("rate must only account for spatial dimensions.") - rate = [1, rate[0], rate[1], 1] # conv2d expects 4-element rate. - - if not utils.is_data_format_channel_last(data_format): - raise ValueError("data_format must be channels-last.") - - super(DepthwiseConvDiagonalFB, self).__init__( - layer_collection=layer_collection, - params=params, - strides=strides, - padding=padding, - dilations=rate, - data_format=data_format) - - # This is a hack to overwrite the same setting in ConvKFCBasicFB.__init__(). - filter_height, filter_width, in_channels, channel_multiplier = ( - params.shape.as_list()) - self._filter_shape = (filter_height, filter_width, in_channels, - in_channels * channel_multiplier) - - def _multiply_matrix(self, matrix, vector): - conv2d_vector = depthwise_conv2d_filter_to_conv2d_filter(vector) - conv2d_result = super( - DepthwiseConvDiagonalFB, self)._multiply_matrix(matrix, conv2d_vector) - return conv2d_filter_to_depthwise_conv2d_filter(conv2d_result) - - -class DepthwiseConvKFCBasicFB(ConvKFCBasicFB): - """FisherBlock for depthwise_conv2d(). - - Equivalent to ConvKFCBasicFB applied to each input channel in isolation. - """ - - def __init__(self, - layer_collection, - params, - strides, - padding, - rate=None, - data_format=None): - """Creates a DepthwiseConvKFCBasicFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - params: Tensor of shape [filter_height, filter_width, in_channels, - channel_multiplier]. - strides: List of 4 ints. Strides along all dimensions. - padding: str. Padding method. - rate: List of 4 ints or None. Rate for dilation along all dimensions. - data_format: str or None. Format of input data. - - Raises: - NotImplementedError: If parameters contains bias. - ValueError: If filter is not 4-D. - ValueError: If strides is not length-4. - ValueError: If rates is not length-2. - ValueError: If channels are not last dimension. - """ - if isinstance(params, (tuple, list)): - raise NotImplementedError("Bias not yet supported.") - - if params.shape.ndims != 4: - raise ValueError("Filter must be 4-D.") - - if len(strides) != 4: - raise ValueError("strides must account for 4 dimensions.") - - if rate is not None: - if len(rate) != 2: - raise ValueError("rate must only account for spatial dimensions.") - rate = [1, rate[0], rate[1], 1] # conv2d expects 4-element rate. - - if not utils.is_data_format_channel_last(data_format): - raise ValueError("data_format must be channels-last.") - - super(DepthwiseConvKFCBasicFB, self).__init__( - layer_collection=layer_collection, - params=params, - padding=padding, - strides=strides, - dilation_rate=rate, - data_format=data_format, - extract_patches_fn="extract_image_patches") - - # This is a hack to overwrite the same setting in ConvKFCBasicFB.__init__(). - filter_height, filter_width, in_channels, channel_multiplier = ( - params.shape.as_list()) - self._filter_shape = (filter_height, filter_width, in_channels, - in_channels * channel_multiplier) - - def _multiply_factored_matrix(self, left_factor, right_factor, vector, - extra_scale=1.0, transpose_left=False, - transpose_right=False): - conv2d_vector = depthwise_conv2d_filter_to_conv2d_filter(vector) - conv2d_result = super( - DepthwiseConvKFCBasicFB, self)._multiply_factored_matrix( - left_factor, right_factor, conv2d_vector, extra_scale=extra_scale, - transpose_left=transpose_left, transpose_right=transpose_right) - return conv2d_filter_to_depthwise_conv2d_filter(conv2d_result) - - -def depthwise_conv2d_filter_to_conv2d_filter(filter, name=None): # pylint: disable=redefined-builtin - """Converts a convolution filter for use with conv2d. - - Transforms a filter for use with tf.nn.depthwise_conv2d() to one that's - compatible with tf.nn.conv2d(). - - Args: - filter: Tensor of shape [height, width, in_channels, channel_multiplier]. - name: None or str. Name of Op. - - Returns: - Tensor of shape [height, width, in_channels, out_channels]. - - """ - with ops.name_scope(name, "depthwise_conv2d_filter_to_conv2d_filter", - [filter]): - filter = ops.convert_to_tensor(filter) - filter_height, filter_width, in_channels, channel_multiplier = ( - filter.shape.as_list()) - - results = [] - for i in range(in_channels): - # Slice out one in_channel's filter. Insert zeros around it to force it - # to affect that channel and that channel alone. - elements = [] - if i > 0: - elements.append( - array_ops.zeros( - [filter_height, filter_width, i, channel_multiplier])) - elements.append(filter[:, :, i:(i + 1), :]) - if i + 1 < in_channels: - elements.append( - array_ops.zeros([ - filter_height, filter_width, in_channels - (i + 1), - channel_multiplier - ])) - - # Concat along in_channel. - results.append( - array_ops.concat(elements, axis=-2, name="in_channel_%d" % i)) - - # Concat along out_channel. - return array_ops.concat(results, axis=-1, name="out_channel") - - -def conv2d_filter_to_depthwise_conv2d_filter(filter, name=None): # pylint: disable=redefined-builtin - """Converts a convolution filter for use with depthwise_conv2d. - - Transforms a filter for use with tf.nn.conv2d() to one that's - compatible with tf.nn.depthwise_conv2d(). Ignores all filters but those along - the diagonal. - - Args: - filter: Tensor of shape [height, width, in_channels, out_channels]. - name: None or str. Name of Op. - - Returns: - Tensor of shape, - [height, width, in_channels, channel_multiplier] - - Raises: - ValueError: if out_channels is not evenly divisible by in_channels. - """ - with ops.name_scope(name, "conv2d_filter_to_depthwise_conv2d_filter", - [filter]): - filter = ops.convert_to_tensor(filter) - filter_height, filter_width, in_channels, out_channels = ( - filter.shape.as_list()) - - if out_channels % in_channels != 0: - raise ValueError("out_channels must be evenly divisible by in_channels.") - channel_multiplier = out_channels // in_channels - - results = [] - filter = array_ops.reshape(filter, [ - filter_height, filter_width, in_channels, in_channels, - channel_multiplier - ]) - for i in range(in_channels): - # Slice out output corresponding to the correct filter. - filter_slice = array_ops.reshape( - filter[:, :, i, i, :], - [filter_height, filter_width, 1, channel_multiplier]) - results.append(filter_slice) - - # Concat along out_channel. - return array_ops.concat(results, axis=-2, name="in_channels") - - -def maybe_tuple(obj): - if not isinstance(obj, list): - return obj - return tuple(obj) - - -def num_conv_locations(input_shape, strides): - """Returns the number of spatial locations a 2D Conv kernel is applied to. - - Args: - input_shape: List of ints representing shape of inputs to - tf.nn.convolution(). - strides: List of ints representing strides along spatial dimensions as - passed in to tf.nn.convolution(). - - Returns: - A scalar |T| denoting the number of spatial locations for the Conv layer. - """ - spatial_input_locations = np.prod(input_shape[1:-1]) - - if strides is None: - spatial_strides_divisor = 1 - else: - spatial_strides_divisor = np.prod(strides) - - return spatial_input_locations // spatial_strides_divisor - - -class InputOutputMultiTowerMultiUse(InputOutputMultiTower): - """Adds methods for multi-use/time-step case to InputOutputMultiTower.""" - - def __init__(self, num_uses=None, *args, **kwargs): - self._num_uses = num_uses - super(InputOutputMultiTowerMultiUse, self).__init__(*args, **kwargs) - - def _process_data(self, grads_list): - """Process temporal/multi-use data into the format used by the factors. - - This function takes inputs and grads_lists data and processes it into - one of the formats expected by the FisherFactor classes (depending on - the value of the global configuration variable TOWER_STRATEGY). - - It accepts the data in one of two initial formats. The first possible - format is where self._inputs is a list of list of Tensors. The first index - is tower, the second is use/time-step. grads_list, meanwhile, is a list - over sources of such lists of lists. - - The second possible data format is where self._inputs is a Tensor with - uses/times-steps folded into the batch dimension. i.e. it is a Tensor - of shape [num_uses * size_batch, ...] which represents a reshape of a - Tensor of shape [num_uses, size_batch, ...]. And similarly grads_list is - a list over sources of such Tensors. - - There are two possible formats which inputs and grads_list are transformed - into. - - If TOWER_STRATEGY is "concat", 'inputs' becomes a tuple containing - a single tensor (represented as a PartitionedTensor object) with all of - the data from the towers, as well as the uses/time-steps, concatenated - together. In this tensor the leading dimension is the batch and - use/time-step dimensions folded together (with 'use' being the major of - these two, so that the tensors can be thought of as reshapes of ones of - shape [num_uses, batch_size, ...]). grads_list is similarly formatted as a - tuple over sources of such tensors. - - If TOWER_STRATEGY is "separate" the inputs are formatted into lists of - tensors over towers. Each of these tensors has a similar format to - the tensor produced by the "concat" option, except that each contains - only the data from a single tower. grads_list is similarly formatted - into a tuple over sources of such tuples. - - Args: - grads_list: grads_list in its initial format (see above). - - Returns: - inputs: self._inputs transformed into the appropriate format (see - above). - grads_list: grads_list transformed into the appropriate format (see - above). - - Raises: - ValueError: If TOWER_STRATEGY is not one of "separate" or "concat". - ValueError: If the given/initial format of self._inputs and grads_list - isn't recognized, or doesn't agree with self._num_uses. - """ - - inputs = self._inputs - - if isinstance(inputs[0], (list, tuple)): - num_uses = len(inputs[0]) - if self._num_uses is not None and self._num_uses != num_uses: - raise ValueError("num_uses argument doesn't match length of inputs.") - else: - self._num_uses = num_uses - - # Check that all mini-batches/towers have the same number of uses - if not all(len(input_) == num_uses for input_ in inputs): - raise ValueError("Length of inputs argument is inconsistent across " - "towers.") - - if fisher_factors.TOWER_STRATEGY == "concat": - # Reverse the tower and use/time-step indices, so that use is now first, - # and towers is second - inputs = tuple(zip(*inputs)) - - # Flatten the two dimensions - inputs = nest.flatten(inputs) - - # Merge everything together into a PartitionedTensor. We package it in - # a singleton tuple since the factors will expect a list over towers - inputs = (utils.PartitionedTensor(inputs),) - - elif fisher_factors.TOWER_STRATEGY == "separate": - # Merge together the uses/time-step dimension into PartitionedTensors, - # but keep the leading dimension (towers) intact for the factors to - # process individually. - inputs = tuple(utils.PartitionedTensor(input_) for input_ in inputs) - - else: - raise ValueError("Global config variable TOWER_STRATEGY must be one of " - "'concat' or 'separate'.") - else: - inputs = tuple(inputs) - - # Now we perform the analogous processing for grads_list - if isinstance(grads_list[0][0], (list, tuple)): - num_uses = len(grads_list[0][0]) - if self._num_uses is not None and self._num_uses != num_uses: - raise ValueError("num_uses argument doesn't match length of outputs, " - "or length of outputs is inconsistent with length of " - "inputs.") - else: - self._num_uses = num_uses - - if not all(len(grad) == num_uses for grads in grads_list - for grad in grads): - raise ValueError("Length of outputs argument is inconsistent across " - "towers.") - - if fisher_factors.TOWER_STRATEGY == "concat": - # Reverse the tower and use/time-step indices, so that use is now first, - # and towers is second - grads_list = tuple(tuple(zip(*grads)) for grads in grads_list) - - # Flatten the two dimensions, leaving the leading dimension (source) - # intact - grads_list = tuple(nest.flatten(grads) for grads in grads_list) - - # Merge inner dimensions together into PartitionedTensors. We package - # them in a singleton tuple since the factors will expect a list over - # towers - grads_list = tuple((utils.PartitionedTensor(grads),) - for grads in grads_list) - - elif fisher_factors.TOWER_STRATEGY == "separate": - # Merge together the uses/time-step dimension into PartitionedTensors, - # but keep the leading dimension (towers) intact for the factors to - # process individually. - grads_list = tuple(tuple(utils.PartitionedTensor(grad) - for grad in grads) - for grads in grads_list) - - else: - raise ValueError("Global config variable TOWER_STRATEGY must be one of " - "'concat' or 'separate'.") - else: - grads_list = tuple(tuple(grads) for grads in grads_list) - - if self._num_uses is None: - raise ValueError("You must supply a value for the num_uses argument if " - "the number of uses cannot be inferred from inputs or " - "outputs arguments (e.g. if they are both given in the " - "single Tensor format, instead of as lists of Tensors.") - - return inputs, grads_list - - -class FullyConnectedMultiIndepFB(InputOutputMultiTowerMultiUse, - KroneckerProductFB): - """FisherBlock for fully-connected layers that share parameters. - - This class implements the "independence across time" approximation from the - following paper: - https://openreview.net/pdf?id=HyMTkQZAb - """ - - def __init__(self, layer_collection, has_bias=False, num_uses=None): - """Creates a FullyConnectedMultiIndepFB block. - - Args: - layer_collection: LayerCollection instance. - has_bias: bool. If True, estimates Fisher with respect to a bias - parameter as well as the layer's parameters. - num_uses: int or None. Number of uses of the layer in the model's graph. - Only required if the data is formatted with uses/time folded into the - batch dimension (instead of uses/time being a list dimension). - (Default: None) - """ - self._has_bias = has_bias - - super(FullyConnectedMultiIndepFB, self).__init__( - layer_collection=layer_collection, - num_uses=num_uses) - - def instantiate_factors(self, grads_list, damping): - inputs, grads_list = self._process_data(grads_list) - - self._input_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedMultiKF, - ((inputs,), self._num_uses, self._has_bias)) - - self._output_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedMultiKF, (grads_list, self._num_uses)) - - self._setup_damping(damping, normalization=self._num_uses) - - @property - def _renorm_coeff(self): - return float(self._num_uses) - - -class ConvKFCBasicMultiIndepFB(InputOutputMultiTowerMultiUse, - KroneckerProductFB): - """FisherBlock for 2D convolutional layers using the basic KFC approx. - - Similar to ConvKFCBasicFB except that this version supports multiple - uses/time-steps via a standard independence approximation. Similar to the - "independence across time" used in FullyConnectedMultiIndepFB but generalized - in the obvious way to conv layers. - """ - - def __init__(self, - layer_collection, - params, - padding, - strides=None, - dilation_rate=None, - data_format=None, - extract_patches_fn=None, - num_uses=None): - """Creates a ConvKFCBasicMultiIndepFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - params: The parameters (Tensor or tuple of Tensors) of this layer. If - kernel alone, a Tensor of shape [..spatial_filter_shape.., - in_channels, out_channels]. If kernel and bias, a tuple of 2 elements - containing the previous and a Tensor of shape [out_channels]. - padding: str. Padding method. - strides: List of ints or None. Contains [..spatial_filter_strides..] if - 'extract_patches_fn' is compatible with tf.nn.convolution(), else - [1, ..spatial_filter_strides, 1]. - dilation_rate: List of ints or None. Rate for dilation along each spatial - dimension if 'extract_patches_fn' is compatible with - tf.nn.convolution(), else [1, ..spatial_dilation_rates.., 1]. - data_format: str or None. Format of input data. - extract_patches_fn: str or None. Name of function that extracts image - patches. One of "extract_convolution_patches", "extract_image_patches", - "extract_pointwise_conv2d_patches". - num_uses: int or None. Number of uses of the layer in the model's graph. - Only required if the data is formatted with uses/time folded into the - batch dimension (instead of uses/time being a list dimension). - (Default: None) - """ - self._padding = padding - self._strides = maybe_tuple(strides) - self._dilation_rate = maybe_tuple(dilation_rate) - self._data_format = data_format - self._extract_patches_fn = extract_patches_fn - self._has_bias = isinstance(params, (tuple, list)) - - fltr = params[0] if self._has_bias else params - self._filter_shape = tuple(fltr.shape.as_list()) - - super(ConvKFCBasicMultiIndepFB, self).__init__( - layer_collection=layer_collection, - num_uses=num_uses) - - def instantiate_factors(self, grads_list, damping): - inputs, grads_list = self._process_data(grads_list) - - # Infer number of locations upon which convolution is applied. - self._num_locations = num_conv_locations(inputs[0].shape.as_list(), - self._strides) - - self._input_factor = self._layer_collection.make_or_get_factor( - fisher_factors.ConvInputKroneckerFactor, - (inputs, self._filter_shape, self._padding, self._strides, - self._dilation_rate, self._data_format, self._extract_patches_fn, - self._has_bias)) - self._output_factor = self._layer_collection.make_or_get_factor( - fisher_factors.ConvOutputKroneckerFactor, (grads_list,)) - - self._setup_damping(damping, normalization= - (self._num_locations * self._num_uses)) - - @property - def _renorm_coeff(self): - return self._num_locations * self._num_uses - - -class EmbeddingKFACMultiIndepFB(InputOutputMultiTowerMultiUse, - KroneckerProductFB): - """K-FAC FisherBlock for embedding layers used multiple times in the graph. - - Similar to EmbeddingKFACFB except that this version supports multiple uses - of the parameter within a single model. These uses could correspond to time - steps in an RNN architecture, but they don't have to. - - Does not support bias parameters. - """ - - def __init__(self, layer_collection, vocab_size, num_uses=None): - """Creates a EmbeddingKFACMultiIndepFB block. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - vocab_size: int. Size of vocabulary for this embedding layer. - num_uses: int or None. Number of uses of the layer in the model's graph. - Only required if the data is formatted with time folded into the batch - dimension (instead of time being a list dimension). (Default: None) - """ - self._vocab_size = vocab_size - - super(EmbeddingKFACMultiIndepFB, self).__init__( - layer_collection=layer_collection, - num_uses=num_uses) - - def instantiate_factors(self, grads_list, damping): - """Instantiate Kronecker Factors for this FisherBlock. - - Args: - grads_list: List of list of list of Tensors. grads_list[i][j][k] is the - gradient of the loss with respect to 'outputs' from source 'i', - tower/mini-batch 'j', and use/time-step 'k'. Each Tensor has shape - [tower_minibatch_size, output_size]. - damping: 0-D Tensor or float. 'damping' * identity is approximately added - to this FisherBlock's Fisher approximation. - """ - inputs, grads_list = self._process_data(grads_list) - - self._input_factor = self._layer_collection.make_or_get_factor( - fisher_factors.EmbeddingInputKroneckerFactor, - (inputs, self._vocab_size)) - self._output_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedMultiKF, (grads_list, self._num_uses)) - self._setup_damping(damping, normalization=self._num_uses) - - @property - def _renorm_coeff(self): - return float(self._num_uses) - - -class SeriesFBApproximation(enum.IntEnum): - """See FullyConnectedSeriesFB.__init__ for description and usage.""" - option1 = 1 - option2 = 2 - - -class FullyConnectedSeriesFB(InputOutputMultiTowerMultiUse, - KroneckerProductFB): - """FisherBlock for fully-connected layers that share parameters across time. - - This class implements the "Option 1" and "Option 2" approximation from the - following paper: - https://openreview.net/pdf?id=HyMTkQZAb - - See the end of the appendix of the paper for a pseudo-code of the - algorithm being implemented by multiply_matpower here. Note that we are - using pre-computed versions of certain matrix-matrix products to speed - things up. This is explicitly explained wherever it is done. - """ - - def __init__(self, - layer_collection, - has_bias=False, - num_uses=None, - option=SeriesFBApproximation.option2): - """Constructs a new `FullyConnectedSeriesFB`. - - Args: - layer_collection: The collection of all layers in the K-FAC approximate - Fisher information matrix to which this FisherBlock belongs. - has_bias: Whether the layer includes a bias parameter. - num_uses: int or None. Number of time-steps over which the layer - is used. Only required if the data is formatted with time folded into - the batch dimension (instead of time being a list dimension). - (Default: None) - option: A `SeriesFBApproximation` specifying the simplifying assumption - to be used in this block. `option1` approximates the cross-covariance - over time as a symmetric matrix, while `option2` makes - the assumption that training sequences are infinitely long. See section - 3.5 of the paper for more details. - """ - - self._has_bias = has_bias - self._option = option - - super(FullyConnectedSeriesFB, self).__init__( - layer_collection=layer_collection, - num_uses=num_uses) - - @property - def _num_timesteps(self): - return self._num_uses - - @property - def _renorm_coeff(self): - # This should no longer be used since the multiply_X functions from the base - # class have been overridden - assert False - - def instantiate_factors(self, grads_list, damping): - inputs, grads_list = self._process_data(grads_list) - - self._input_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedMultiKF, - ((inputs,), self._num_uses, self._has_bias)) - self._input_factor.register_cov_dt1() - - self._output_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedMultiKF, (grads_list, self._num_uses)) - self._output_factor.register_cov_dt1() - - self._setup_damping(damping, normalization=self._num_uses) - - def register_matpower(self, exp): - if exp != -1: - raise NotImplementedError("FullyConnectedSeriesFB only supports inverse" - "multiplications.") - - if self._option == SeriesFBApproximation.option1: - self._input_factor.register_option1quants(self._input_damping_func) - self._output_factor.register_option1quants(self._output_damping_func) - elif self._option == SeriesFBApproximation.option2: - self._input_factor.register_option2quants(self._input_damping_func) - self._output_factor.register_option2quants(self._output_damping_func) - else: - raise ValueError( - "Unrecognized FullyConnectedSeriesFB approximation: {}".format( - self._option)) - - def multiply_matpower(self, vector, exp): - if exp != -1: - raise NotImplementedError("FullyConnectedSeriesFB only supports inverse" - "multiplications.") - - # pylint: disable=invalid-name - - Z = utils.layer_params_to_mat2d(vector) - - # Derivations were done for "batch_dim==1" case so we need to convert to - # that orientation: - Z = array_ops.transpose(Z) - - if self._option == SeriesFBApproximation.option1: - - # Note that \\(L_A = A0^{-1/2} * U_A and L_G = G0^{-1/2} * U_G.\\) - L_A, psi_A = self._input_factor.get_option1quants( - self._input_damping_func) - L_G, psi_G = self._output_factor.get_option1quants( - self._output_damping_func) - - def gamma(x): - # We are assuming that each case has the same number of time-steps. - # If this stops being the case one shouldn't simply replace this T - # with its average value. Instead, one needs to go back to the - # definition of the gamma function from the paper. - T = self._num_timesteps - return (1 - x)**2 / (T * (1 - x**2) - 2 * x * (1 - x**T)) - - # \\(Y = \gamma( psi_G*psi_A^T )\\) (computed element-wise) - # Even though Y is Z-independent we are recomputing it from the psi's - # each since Y depends on both A and G quantities, and it is relatively - # cheap to compute. - Y = gamma(array_ops.reshape(psi_G, [int(psi_G.shape[0]), -1]) * psi_A) - - # \\(Z = L_G^T * Z * L_A\\) - # This is equivalent to the following computation from the original - # pseudo-code: - # \\(Z = G0^{-1/2} * Z * A0^{-1/2}\\) - # \\(Z = U_G^T * Z * U_A\\) - Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A), transpose_a=True) - - # \\(Z = Z .* Y\\) - Z *= Y - - # \\(Z = L_G * Z * L_A^T\\) - # This is equivalent to the following computation from the original - # pseudo-code: - # \\(Z = U_G * Z * U_A^T\\) - # \\(Z = G0^{-1/2} * Z * A0^{-1/2}\\) - Z = math_ops.matmul(L_G, math_ops.matmul(Z, L_A, transpose_b=True)) - - elif self._option == SeriesFBApproximation.option2: - - # Note that \\(P_A = A_1^T * A_0^{-1} and P_G = G_1^T * G_0^{-1}\\), - # and \\(K_A = A_0^{-1/2} * E_A\ and\ K_G = G_0^{-1/2} * E_G.\\) - P_A, K_A, mu_A = self._input_factor.get_option2quants( - self._input_damping_func) - P_G, K_G, mu_G = self._output_factor.get_option2quants( - self._output_damping_func) - - # Our approach differs superficially from the pseudo-code in the paper - # in order to reduce the total number of matrix-matrix multiplies. - # In particular, the first three computations in the pseudo code are - # \\(Z = G0^{-1/2} * Z * A0^{-1/2}\\) - # \\(Z = Z - hPsi_G^T * Z * hPsi_A\\) - # \\(Z = E_G^T * Z * E_A\\) - # Noting that hPsi = C0^{-1/2} * C1 * C0^{-1/2}\\), so that - # \\(C0^{-1/2} * hPsi = C0^{-1} * C1 * C0^{-1/2} = P^T * C0^{-1/2}\\) - # the entire computation can be written as - # \\(Z = E_G^T * (G0^{-1/2} * Z * A0^{-1/2}\\) - # \\( - hPsi_G^T * G0^{-1/2} * Z * A0^{-1/2} * hPsi_A) * E_A\\) - # \\( = E_G^T * (G0^{-1/2} * Z * A0^{-1/2}\\) - # \\( - G0^{-1/2} * P_G * Z * P_A^T * A0^{-1/2}) * E_A\\) - # \\( = E_G^T * G0^{-1/2} * Z * A0^{-1/2} * E_A\\) - # \\( - E_G^T* G0^{-1/2} * P_G * Z * P_A^T * A0^{-1/2} * E_A\\) - # \\( = K_G^T * Z * K_A - K_G^T * P_G * Z * P_A^T * K_A\\) - # This final expression is computed by the following two lines: - # \\(Z = Z - P_G * Z * P_A^T\\) - Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A, transpose_b=True)) - # \\(Z = K_G^T * Z * K_A\\) - Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A), transpose_a=True) - - # \\(Z = Z ./ (1*1^T - mu_G*mu_A^T)\\) - # Be careful with the outer product. We don't want to accidentally - # make it an inner-product instead. - tmp = 1.0 - array_ops.reshape(mu_G, [int(mu_G.shape[0]), -1]) * mu_A - # Prevent some numerical issues by setting any 0.0 eigs to 1.0 - tmp += 1.0 * math_ops.cast(math_ops.equal(tmp, 0.0), dtype=tmp.dtype) - Z /= tmp - - # We now perform the transpose/reverse version of the operations - # derived above, whose derivation from the original pseudo-code is - # analgous. - # \\(Z = K_G * Z * K_A^T\\) - Z = math_ops.matmul(K_G, math_ops.matmul(Z, K_A, transpose_b=True)) - - # \\(Z = Z - P_G^T * Z * P_A\\) - Z -= math_ops.matmul(P_G, math_ops.matmul(Z, P_A), transpose_a=True) - - # \\(Z = normalize (1/E[T]) * Z\\) - # Note that this normalization is done because we compute the statistics - # by averaging, not summing, over time. (And the gradient is presumably - # summed over time, not averaged, and thus their scales are different.) - Z /= math_ops.cast(self._num_timesteps, Z.dtype) - - # Convert back to the "batch_dim==0" orientation. - Z = array_ops.transpose(Z) - - return utils.mat2d_to_layer_params(vector, Z) - - # pylint: enable=invalid-name - - def multiply_cholesky(self, vector): - raise NotImplementedError("FullyConnectedSeriesFB does not support " - "Cholesky computations.") - - def multiply_cholesky_inverse(self, vector): - raise NotImplementedError("FullyConnectedSeriesFB does not support " - "Cholesky computations.") - diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py deleted file mode 100644 index c04cf727fa..0000000000 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""FisherBlock definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.kfac.python.ops.fisher_blocks import * -from tensorflow.python.util.all_util import remove_undocumented -# pylint: enable=unused-import,line-too-long,wildcard-import - -_allowed_symbols = [ - 'FisherBlock', - 'FullFB', - 'NaiveDiagonalFB', - 'FullyConnectedDiagonalFB', - 'KroneckerProductFB', - 'EmbeddingKFACFB', - 'FullyConnectedKFACBasicFB', - 'ConvKFCBasicFB', - 'ConvDiagonalFB', - 'set_global_constants', - 'compute_pi_tracenorm', - 'compute_pi_adjusted_damping', - 'num_conv_locations', - 'normalize_damping', - 'LEFT_MULTIPLY', - 'RIGHT_MULTIPLY', -] - -remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py deleted file mode 100644 index afa2fd1ca7..0000000000 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py +++ /dev/null @@ -1,1830 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""FisherFactor definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc -import contextlib - -import numpy as np -import six - -from tensorflow.contrib.kfac.python.ops import linear_operator as lo -from tensorflow.contrib.kfac.python.ops import utils -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops as tf_ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import special_math_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables -from tensorflow.python.training import moving_averages -from tensorflow.python.util import nest - - -# Whether to initialize covariance estimators at a zero matrix (or the identity -# matrix). -INIT_COVARIANCES_AT_ZERO = True - -# Whether to zero-debias the moving averages. -ZERO_DEBIAS = True - -# Whether to initialize inverse (and other such matrices computed from the cov -# matrices) to the zero matrix (or the identity matrix). -INIT_INVERSES_AT_ZERO = True - -# When the number of inverses requested from a FisherFactor exceeds this value, -# the inverses are computed using an eigenvalue decomposition. -EIGENVALUE_DECOMPOSITION_THRESHOLD = 2 - -# Numerical eigenvalues computed from covariance matrix estimates are clipped to -# be at least as large as this value before they are used to compute inverses or -# matrix powers. Must be nonnegative. -EIGENVALUE_CLIPPING_THRESHOLD = 0.0 - -# Used to subsample the flattened extracted image patches. The number of -# outer products per row of the covariance matrix should not exceed this -# value. This parameter is used only if `_SUB_SAMPLE_OUTER_PRODUCTS` is True. -_MAX_NUM_OUTER_PRODUCTS_PER_COV_ROW = 1 - -# Used to subsample the inputs passed to the extract image patches. The batch -# size of number of inputs to extract image patches is multiplied by this -# factor. This parameter is used only if `_SUB_SAMPLE_INPUTS` is True. -_INPUTS_TO_EXTRACT_PATCHES_FACTOR = 0.5 - -# If True, then subsamples the tensor passed to compute the covariance matrix. -_SUB_SAMPLE_OUTER_PRODUCTS = False - -# If True, then subsamples the tensor passed to compute the covariance matrix. -_SUB_SAMPLE_INPUTS = False - -# TOWER_STRATEGY can be one of "concat" or "separate". If "concat", the data -# passed to the factors from the blocks will be concatenated across towers -# (lazily via PartitionedTensor objects). Otherwise a tuple of tensors over -# towers will be passed in, and the factors will iterate over this and do the -# cov computations separately for each one, averaging the results together. -TOWER_STRATEGY = "concat" - - -def set_global_constants(init_covariances_at_zero=None, - zero_debias=None, - init_inverses_at_zero=None, - eigenvalue_decomposition_threshold=None, - eigenvalue_clipping_threshold=None, - max_num_outer_products_per_cov_row=None, - sub_sample_outer_products=None, - inputs_to_extract_patches_factor=None, - sub_sample_inputs=None, - tower_strategy=None): - """Sets various global constants used by the classes in this module.""" - global INIT_COVARIANCES_AT_ZERO - global ZERO_DEBIAS - global INIT_INVERSES_AT_ZERO - global EIGENVALUE_DECOMPOSITION_THRESHOLD - global EIGENVALUE_CLIPPING_THRESHOLD - global _MAX_NUM_OUTER_PRODUCTS_PER_COV_ROW - global _SUB_SAMPLE_OUTER_PRODUCTS - global _INPUTS_TO_EXTRACT_PATCHES_FACTOR - global _SUB_SAMPLE_INPUTS - global TOWER_STRATEGY - - if init_covariances_at_zero is not None: - INIT_COVARIANCES_AT_ZERO = init_covariances_at_zero - if zero_debias is not None: - ZERO_DEBIAS = zero_debias - if init_inverses_at_zero is not None: - INIT_INVERSES_AT_ZERO = init_inverses_at_zero - if eigenvalue_decomposition_threshold is not None: - EIGENVALUE_DECOMPOSITION_THRESHOLD = eigenvalue_decomposition_threshold - if eigenvalue_clipping_threshold is not None: - EIGENVALUE_CLIPPING_THRESHOLD = eigenvalue_clipping_threshold - if max_num_outer_products_per_cov_row is not None: - _MAX_NUM_OUTER_PRODUCTS_PER_COV_ROW = max_num_outer_products_per_cov_row - if sub_sample_outer_products is not None: - _SUB_SAMPLE_OUTER_PRODUCTS = sub_sample_outer_products - if inputs_to_extract_patches_factor is not None: - _INPUTS_TO_EXTRACT_PATCHES_FACTOR = inputs_to_extract_patches_factor - if sub_sample_inputs is not None: - _SUB_SAMPLE_INPUTS = sub_sample_inputs - if tower_strategy is not None: - TOWER_STRATEGY = tower_strategy - - -def inverse_initializer(shape, dtype, partition_info=None): # pylint: disable=unused-argument - if INIT_INVERSES_AT_ZERO: - return array_ops.zeros(shape, dtype=dtype) - return linalg_ops.eye(num_rows=shape[0], dtype=dtype) - - -def covariance_initializer(shape, dtype, partition_info=None): # pylint: disable=unused-argument - if INIT_COVARIANCES_AT_ZERO: - return array_ops.zeros(shape, dtype=dtype) - return linalg_ops.eye(num_rows=shape[0], dtype=dtype) - - -def diagonal_covariance_initializer(shape, dtype, partition_info=None): # pylint: disable=unused-argument - if INIT_COVARIANCES_AT_ZERO: - return array_ops.zeros(shape, dtype=dtype) - return array_ops.ones(shape, dtype=dtype) - - -@contextlib.contextmanager -def place_on_device(device): - if device is not None and len(device): - with tf_ops.device(device): - yield - else: - yield - - -def compute_cov(tensor, tensor_right=None, normalizer=None): - """Compute the empirical second moment of the rows of a 2D Tensor. - - This function is meant to be applied to random matrices for which the true row - mean is zero, so that the true second moment equals the true covariance. - - Args: - tensor: A 2D Tensor. - tensor_right: An optional 2D Tensor. If provided, this function computes - the matrix product tensor^T * tensor_right instead of tensor^T * tensor. - normalizer: optional scalar for the estimator (by default, the normalizer is - the number of rows of tensor). - - Returns: - A square 2D Tensor with as many rows/cols as the number of input columns. - """ - if normalizer is None: - normalizer = array_ops.shape(tensor)[0] - if tensor_right is None: - cov = ( - math_ops.matmul(tensor, tensor, transpose_a=True) / math_ops.cast( - normalizer, tensor.dtype)) - return (cov + array_ops.transpose(cov)) / math_ops.cast(2.0, cov.dtype) - else: - return (math_ops.matmul(tensor, tensor_right, transpose_a=True) / - math_ops.cast(normalizer, tensor.dtype)) - - -def append_homog(tensor): - """Appends a homogeneous coordinate to the last dimension of a Tensor. - - Args: - tensor: A Tensor. - - Returns: - A Tensor identical to the input but one larger in the last dimension. The - new entries are filled with ones. - """ - rank = len(tensor.shape.as_list()) - shape = array_ops.concat([array_ops.shape(tensor)[:-1], [1]], axis=0) - ones = array_ops.ones(shape, dtype=tensor.dtype) - return array_ops.concat([tensor, ones], axis=rank - 1) - - -def scope_string_from_params(params): - """Builds a variable scope string name from the given parameters. - - Supported parameters are: - * tensors - * booleans - * ints - * strings - * depth-1 tuples/lists of ints - * any depth tuples/lists of tensors - Other parameter types will throw an error. - - Args: - params: A parameter or list of parameters. - - Returns: - A string to use for the variable scope. - - Raises: - ValueError: if params includes an unsupported type. - """ - params = params if isinstance(params, (tuple, list)) else (params,) - - name_parts = [] - for param in params: - if param is None: - name_parts.append("None") - elif isinstance(param, (tuple, list)): - if all([isinstance(p, int) for p in param]): - name_parts.append("-".join([str(p) for p in param])) - else: - name_parts.append(scope_string_from_name(param)) - elif isinstance(param, (str, int, bool)): - name_parts.append(str(param)) - elif isinstance(param, (tf_ops.Tensor, variables.Variable)): - name_parts.append(scope_string_from_name(param)) - elif isinstance(param, utils.PartitionedTensor): - name_parts.append(scope_string_from_name(param.tensors)) - else: - raise ValueError("Encountered an unsupported param type {}".format( - type(param))) - return "_".join(name_parts) - - -def scope_string_from_name(tensor): - if isinstance(tensor, (tuple, list)): - return "__".join([scope_string_from_name(t) for t in tensor]) - # "gradients/add_4_grad/Reshape:0" -> "gradients_add_4_grad_Reshape" - return tensor.name.split(":")[0].replace("/", "_") - - -def scalar_or_tensor_to_string(val): - return repr(val) if np.isscalar(val) else scope_string_from_name(val) - - -def list_to_string(lst): - return "_".join(val if isinstance(val, six.string_types) - else scalar_or_tensor_to_string(val) for val in lst) - - -def graph_func_to_id(func): - """Returns a hashable object that represents func's computation.""" - # TODO(b/74201126): replace with Topohash of func's output - return func.func_id - - -def graph_func_to_string(func): - # TODO(b/74201126): replace with Topohash of func's output - return list_to_string(func.func_id) - - -def _subsample_for_cov_computation(array, name=None): - """Subsamples the first dimension of the array. - - `array`(A) is a tensor of shape `[batch_size, dim_2]`. Then the covariance - matrix(A^TA) is of shape `dim_2 ** 2`. Subsample only if the number of outer - products per row of the covariance matrix is greater than - `_MAX_NUM_OUTER_PRODUCTS_PER_COV_ROW`. - - Args: - array: Tensor, of shape `[batch_size, dim_2]`. - name: `string`, Default(None) - - Returns: - A tensor of shape `[max_samples, dim_2]`. - - Raises: - ValueError: If array's is not matrix-shaped. - ValueError: If array's batch_size cannot be inferred. - - """ - with tf_ops.name_scope(name, "subsample", [array]): - array = tf_ops.convert_to_tensor(array) - if len(array.shape) != 2: - raise ValueError("Input param array must be a matrix.") - - batch_size = array.shape.as_list()[0] - if batch_size is None: - raise ValueError("Unable to get batch_size from input param array.") - - num_cov_rows = array.shape.as_list()[-1] - max_batch_size = int(_MAX_NUM_OUTER_PRODUCTS_PER_COV_ROW * num_cov_rows) - if batch_size <= max_batch_size: - return array - - return _random_tensor_gather(array, max_batch_size) - - -def _random_tensor_gather(array, max_size): - """Generates a random set of indices and gathers the value at the indices. - - Args: - array: Tensor, of shape `[batch_size, dim_2]`. - max_size: int, Number of indices to sample. - - Returns: - A tensor of shape `[max_size, ...]`. - """ - batch_size = array.shape.as_list()[0] - indices = random_ops.random_shuffle(math_ops.range(0, batch_size))[:max_size] - return array_ops.gather(array, indices) - - -@six.add_metaclass(abc.ABCMeta) -class FisherFactor(object): - """Base class for objects modeling factors of approximate Fisher blocks. - - A FisherFactor represents part of an approximate Fisher Information matrix. - For example, one approximation to the Fisher uses the Kronecker product of two - FisherFactors A and B, F = kron(A, B). FisherFactors are composed with - FisherBlocks to construct a block-diagonal approximation to the full Fisher. - - FisherFactors are backed by a single, non-trainable variable that is updated - by running FisherFactor.make_covariance_update_op(). The shape and type of - this variable is implementation specific. - - Note that for blocks that aren't based on approximations, a 'factor' can - be the entire block itself, as is the case for the diagonal and full - representations. - """ - - def __init__(self): - self._cov = None - - @abc.abstractproperty - def _var_scope(self): - """Variable scope for this FisherFactor instance. - - Returns: - string that unique identifies this FisherFactor instance. - """ - pass - - @property - def name(self): - return self._var_scope - - @abc.abstractproperty - def _cov_shape(self): - """The shape of the variable backing this FisherFactor.""" - pass - - @abc.abstractproperty - def _num_sources(self): - """The number of things to sum over when updating covariance variable. - - The default make_covariance_update_op function will call _compute_new_cov - with indices ranging from 0 to _num_sources-1. The typical situation is - where the factor wants to sum the statistics it computes over multiple - backpropped "gradients" (typically passed in via "tensors" or - "outputs_grads" arguments). - """ - pass - - @abc.abstractproperty - def _num_towers(self): - pass - - @abc.abstractproperty - def _dtype(self): - """dtype for variable backing this factor.""" - pass - - @property - def _cov_initializer(self): - """Function for initializing covariance variable.""" - return covariance_initializer - - def instantiate_cov_variables(self): - """Makes the internal cov variable(s).""" - assert self._cov is None - with variable_scope.variable_scope(self._var_scope): - self._cov = variable_scope.get_variable( - "cov", - initializer=self._cov_initializer, - shape=self._cov_shape, - trainable=False, - dtype=self._dtype) - - @abc.abstractmethod - def _compute_new_cov(self, source, tower): - """Computes minibatch-estimated covariance for a single source. - - Args: - source: int in [0, self._num_sources). Which source to use when computing - the cov update. - tower: int in [0, self._num_towers). Which tower to use when computing - the cov update. - - Returns: - Tensor of same shape as self.get_cov(). - """ - pass - - def make_covariance_update_op(self, ema_decay): - """Constructs and returns the covariance update Op. - - Args: - ema_decay: The exponential moving average decay (float or Tensor). - Returns: - An Op for updating the covariance Variable referenced by _cov. - """ - new_cov_contribs = [] - for source in range(self._num_sources): - for tower in range(self._num_towers): - device = (self._get_data_device(tower) - if TOWER_STRATEGY == "separate" else None) - with place_on_device(device): - new_cov_contribs.append(self._compute_new_cov(source, tower)) - - new_cov = math_ops.add_n(new_cov_contribs) / float(self._num_towers) - - # Compute average of 'new_cov' across all TPU cores. On a TPU, each - # instance of 'new_cov' will be based on a different minibatch. This ensures - # that by the end of assign_moving_average(), all TPU cores see the same - # value for self._cov. - # - # Other implementations of make_covariance_update_op() that accumulate - # statistics in other variables should mimic this behavior. - if utils.on_tpu(): - new_cov = utils.cross_replica_mean(new_cov) - - return moving_averages.assign_moving_average( - self._cov, new_cov, ema_decay, zero_debias=ZERO_DEBIAS) - - @abc.abstractmethod - def _get_data_device(self, tower): - pass - - @abc.abstractmethod - def instantiate_inv_variables(self): - """Makes the internal "inverse" variable(s).""" - pass - - @abc.abstractmethod - def make_inverse_update_ops(self): - """Create and return update ops corresponding to registered computations.""" - pass - - def get_cov(self): - return self._cov - - @abc.abstractmethod - def get_cov_as_linear_operator(self): - pass - - @abc.abstractmethod - def register_matpower(self, exp, damping_func): - pass - - @abc.abstractmethod - def register_cholesky(self, damping_func): - pass - - @abc.abstractmethod - def register_cholesky_inverse(self, damping_func): - pass - - @abc.abstractmethod - def get_matpower(self, exp, damping_func): - pass - - @abc.abstractmethod - def get_cholesky(self, damping_func): - pass - - @abc.abstractmethod - def get_cholesky_inverse(self, damping_func): - pass - - -class DenseSquareMatrixFactor(FisherFactor): - """Base class for FisherFactors that are stored as dense square matrices. - - This class explicitly calculates and stores inverses of their `cov` matrices, - which must be square dense matrices. - - Subclasses must implement the _compute_new_cov method, and the _var_scope and - _cov_shape properties. - """ - - # TODO(b/69108481): This class (and its subclasses) should be refactored to - # serve the matrix quantities it computes as both (potentially stale) - # variables, updated by the inverse update ops, and fresh values stored in - # tensors that recomputed once every session.run() call. Currently matpower - # and damp_inverse have the former behavior, while eigendecomposition has - # the latter. - - def __init__(self): - self._matpower_by_exp_and_damping = {} # { (float, hashable): variable } - self._matpower_registrations = set() # { (float, hashable) } - self._eigendecomp = None - self._damping_funcs_by_id = {} # {hashable: lambda} - - self._cholesky_registrations = set() # { hashable } - self._cholesky_inverse_registrations = set() # { hashable } - - self._cholesky_by_damping = {} # { hashable: variable } - self._cholesky_inverse_by_damping = {} # { hashable: variable } - - super(DenseSquareMatrixFactor, self).__init__() - - def get_cov_as_linear_operator(self): - assert self.get_cov().shape.ndims == 2 - return lo.LinearOperatorFullMatrix(self.get_cov(), - is_self_adjoint=True, - is_square=True) - - def _register_damping(self, damping_func): - damping_id = graph_func_to_id(damping_func) - if damping_id not in self._damping_funcs_by_id: - self._damping_funcs_by_id[damping_id] = damping_func - return damping_id - - def register_inverse(self, damping_func): - # Just for backwards compatibility of some old code and tests - self.register_matpower(-1, damping_func) - - def register_matpower(self, exp, damping_func): - """Registers a matrix power to be maintained and served on demand. - - This creates a variable and signals make_inverse_update_ops to make the - corresponding update op. The variable can be read via the method - get_matpower. - - Args: - exp: float. The exponent to use in the matrix power. - damping_func: A function that computes a 0-D Tensor or a float which will - be the damping value used. i.e. damping = damping_func(). - """ - if exp == 1.0: - return - - damping_id = self._register_damping(damping_func) - - if (exp, damping_id) not in self._matpower_registrations: - self._matpower_registrations.add((exp, damping_id)) - - def register_cholesky(self, damping_func): - """Registers a Cholesky factor to be maintained and served on demand. - - This creates a variable and signals make_inverse_update_ops to make the - corresponding update op. The variable can be read via the method - get_cholesky. - - Args: - damping_func: A function that computes a 0-D Tensor or a float which will - be the damping value used. i.e. damping = damping_func(). - """ - damping_id = self._register_damping(damping_func) - - if damping_id not in self._cholesky_registrations: - self._cholesky_registrations.add(damping_id) - - def register_cholesky_inverse(self, damping_func): - """Registers an inverse Cholesky factor to be maintained/served on demand. - - This creates a variable and signals make_inverse_update_ops to make the - corresponding update op. The variable can be read via the method - get_cholesky_inverse. - - Args: - damping_func: A function that computes a 0-D Tensor or a float which will - be the damping value used. i.e. damping = damping_func(). - """ - damping_id = self._register_damping(damping_func) - - if damping_id not in self._cholesky_inverse_registrations: - self._cholesky_inverse_registrations.add(damping_id) - - def instantiate_inv_variables(self): - """Makes the internal "inverse" variable(s).""" - - for (exp, damping_id) in self._matpower_registrations: - exp_string = scalar_or_tensor_to_string(exp) - damping_func = self._damping_funcs_by_id[damping_id] - damping_string = graph_func_to_string(damping_func) - with variable_scope.variable_scope(self._var_scope): - matpower = variable_scope.get_variable( - "matpower_exp{}_damp{}".format(exp_string, damping_string), - initializer=inverse_initializer, - shape=self._cov_shape, - trainable=False, - dtype=self._dtype) - assert (exp, damping_id) not in self._matpower_by_exp_and_damping - self._matpower_by_exp_and_damping[(exp, damping_id)] = matpower - - for damping_id in self._cholesky_registrations: - damping_func = self._damping_funcs_by_id[damping_id] - damping_string = graph_func_to_string(damping_func) - with variable_scope.variable_scope(self._var_scope): - chol = variable_scope.get_variable( - "cholesky_damp{}".format(damping_string), - initializer=inverse_initializer, - shape=self._cov_shape, - trainable=False, - dtype=self._dtype) - assert damping_id not in self._cholesky_by_damping - self._cholesky_by_damping[damping_id] = chol - - for damping_id in self._cholesky_inverse_registrations: - damping_func = self._damping_funcs_by_id[damping_id] - damping_string = graph_func_to_string(damping_func) - with variable_scope.variable_scope(self._var_scope): - cholinv = variable_scope.get_variable( - "cholesky_inverse_damp{}".format(damping_string), - initializer=inverse_initializer, - shape=self._cov_shape, - trainable=False, - dtype=self._dtype) - assert damping_id not in self._cholesky_inverse_by_damping - self._cholesky_inverse_by_damping[damping_id] = cholinv - - def make_inverse_update_ops(self): - """Create and return update ops corresponding to registered computations.""" - ops = [] - - num_inverses = sum(1 for (exp, _) in self._matpower_by_exp_and_damping - if exp == -1) - - num_other_matpower = len(self._matpower_by_exp_and_damping) - num_inverses - - other_matrix_power_registered = num_other_matpower >= 1 - - use_eig = ( - self._eigendecomp or other_matrix_power_registered or - num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD) - - # We precompute these so we don't need to evaluate them multiple times (for - # each matrix power that uses them) - damping_value_by_id = {damping_id: math_ops.cast( - self._damping_funcs_by_id[damping_id](), self._dtype) - for damping_id in self._damping_funcs_by_id} - - if use_eig: - eigenvalues, eigenvectors = self.get_eigendecomp() # pylint: disable=unpacking-non-sequence - - for (exp, damping_id), matpower in ( - self._matpower_by_exp_and_damping.items()): - damping = damping_value_by_id[damping_id] - ops.append( - matpower.assign( - math_ops.matmul(eigenvectors * - (eigenvalues + damping)**exp, - array_ops.transpose(eigenvectors)))) - # These ops share computation and should be run on a single device. - ops = [control_flow_ops.group(*ops)] - else: - for (exp, damping_id), matpower in ( - self._matpower_by_exp_and_damping.items()): - assert exp == -1 - damping = damping_value_by_id[damping_id] - ops.append(matpower.assign(utils.posdef_inv(self.get_cov(), damping))) - - # TODO(b/77902055): If inverses are being computed with Cholesky's - # we can share the work. Instead this code currently just computes the - # Cholesky a second time. It does at least share work between requests for - # Cholesky's and Cholesky inverses with the same damping id. - for damping_id, cholesky_inv in self._cholesky_inverse_by_damping.items(): - cholesky_ops = [] - - damping = damping_value_by_id[damping_id] - cholesky_value = utils.cholesky(self.get_cov(), damping) - - if damping_id in self._cholesky_by_damping: - cholesky = self._cholesky_by_damping[damping_id] - cholesky_ops.append(cholesky.assign(cholesky_value)) - - identity = linalg_ops.eye(cholesky_value.shape.as_list()[0], - dtype=cholesky_value.dtype) - cholesky_inv_value = linalg_ops.matrix_triangular_solve(cholesky_value, - identity) - cholesky_ops.append(cholesky_inv.assign(cholesky_inv_value)) - - ops.append(control_flow_ops.group(*cholesky_ops)) - - for damping_id, cholesky in self._cholesky_by_damping.items(): - if damping_id not in self._cholesky_inverse_by_damping: - damping = damping_value_by_id[damping_id] - cholesky_value = utils.cholesky(self.get_cov(), damping) - ops.append(cholesky.assign(cholesky_value)) - - self._eigendecomp = False - return ops - - def get_inverse(self, damping_func): - # Just for backwards compatibility of some old code and tests - return self.get_matpower(-1, damping_func) - - def get_matpower(self, exp, damping_func): - # Note that this function returns a variable which gets updated by the - # inverse ops. It may be stale / inconsistent with the latest value of - # get_cov(). - if exp != 1: - damping_id = graph_func_to_id(damping_func) - matpower = self._matpower_by_exp_and_damping[(exp, damping_id)] - else: - matpower = self.get_cov() - identity = linalg_ops.eye(matpower.shape.as_list()[0], - dtype=matpower.dtype) - matpower += math_ops.cast(damping_func(), dtype=matpower.dtype)*identity - - assert matpower.shape.ndims == 2 - return lo.LinearOperatorFullMatrix(matpower, - is_non_singular=True, - is_self_adjoint=True, - is_positive_definite=True, - is_square=True) - - def get_cholesky(self, damping_func): - # Note that this function returns a variable which gets updated by the - # inverse ops. It may be stale / inconsistent with the latest value of - # get_cov(). - damping_id = graph_func_to_id(damping_func) - cholesky = self._cholesky_by_damping[damping_id] - assert cholesky.shape.ndims == 2 - return lo.LinearOperatorFullMatrix(cholesky, - is_non_singular=True, - is_square=True) - - def get_cholesky_inverse(self, damping_func): - # Note that this function returns a variable which gets updated by the - # inverse ops. It may be stale / inconsistent with the latest value of - # get_cov(). - damping_id = graph_func_to_id(damping_func) - cholesky_inv = self._cholesky_inverse_by_damping[damping_id] - assert cholesky_inv.shape.ndims == 2 - return lo.LinearOperatorFullMatrix(cholesky_inv, - is_non_singular=True, - is_square=True) - - def get_eigendecomp(self): - """Creates or retrieves eigendecomposition of self._cov.""" - # Unlike get_matpower this doesn't retrieve a stored variable, but instead - # always computes a fresh version from the current value of get_cov(). - if not self._eigendecomp: - eigenvalues, eigenvectors = linalg_ops.self_adjoint_eig(self.get_cov()) - - # The matrix self._cov is positive semidefinite by construction, but the - # numerical eigenvalues could be negative due to numerical errors, so here - # we clip them to be at least FLAGS.eigenvalue_clipping_threshold - clipped_eigenvalues = math_ops.maximum(eigenvalues, - EIGENVALUE_CLIPPING_THRESHOLD) - self._eigendecomp = (clipped_eigenvalues, eigenvectors) - - return self._eigendecomp - - -class FullFactor(DenseSquareMatrixFactor): - """FisherFactor for a full matrix representation of the Fisher of a parameter. - - Note that this uses the naive "square the sum estimator", and so is applicable - to any type of parameter in principle, but has very high variance. - """ - - def __init__(self, - params_grads, - batch_size): - self._batch_size = batch_size - self._params_grads = tuple(utils.ensure_sequence(params_grad) - for params_grad in params_grads) - super(FullFactor, self).__init__() - - @property - def _var_scope(self): - return "ff_full_" + scope_string_from_params( - [self._params_grads, self._batch_size]) - - @property - def _cov_shape(self): - size = sum(param_grad.shape.num_elements() - for param_grad in self._params_grads[0]) - return (size, size) - - @property - def _num_sources(self): - return len(self._params_grads) - - @property - def _num_towers(self): - return 1 - - @property - def _dtype(self): - return self._params_grads[0][0].dtype - - def _compute_new_cov(self, source, tower): - assert tower == 0 - - # This will be a very basic rank 1 estimate - params_grads_flat = utils.tensors_to_column(self._params_grads[source]) - return ((params_grads_flat * array_ops.transpose( - params_grads_flat)) / math_ops.cast(self._batch_size, - params_grads_flat.dtype)) - - def _get_data_device(self, tower): - return None - - -class DiagonalFactor(FisherFactor): - """A base class for FisherFactors that use diagonal approximations. - - A DiagonalFactor's covariance variable can be of any shape, but must contain - exactly one entry per parameter. - """ - - def __init__(self): - super(DiagonalFactor, self).__init__() - - def get_cov_as_linear_operator(self): - assert self._matrix_diagonal.shape.ndims == 1 - return lo.LinearOperatorDiag(self._matrix_diagonal, - is_self_adjoint=True, - is_square=True) - - @property - def _cov_initializer(self): - return diagonal_covariance_initializer - - @property - def _matrix_diagonal(self): - return array_ops.reshape(self.get_cov(), [-1]) - - def make_inverse_update_ops(self): - return [] - - def instantiate_inv_variables(self): - pass - - def register_matpower(self, exp, damping_func): - pass - - def register_cholesky(self, damping_func): - pass - - def register_cholesky_inverse(self, damping_func): - pass - - def get_matpower(self, exp, damping_func): - matpower_diagonal = (self._matrix_diagonal - + math_ops.cast(damping_func(), self._dtype))**exp - return lo.LinearOperatorDiag(matpower_diagonal, - is_non_singular=True, - is_self_adjoint=True, - is_positive_definite=True, - is_square=True) - - def get_cholesky(self, damping_func): - return self.get_matpower(0.5, damping_func) - - def get_cholesky_inverse(self, damping_func): - return self.get_matpower(-0.5, damping_func) - - -class NaiveDiagonalFactor(DiagonalFactor): - """FisherFactor for a diagonal approximation of any type of param's Fisher. - - Note that this uses the naive "square the sum estimator", and so is applicable - to any type of parameter in principle, but has very high variance. - """ - - def __init__(self, - params_grads, - batch_size): - """Initializes NaiveDiagonalFactor instance. - - Args: - params_grads: Sequence of Tensors, each with same shape as parameters this - FisherFactor corresponds to. For example, the gradient of the loss with - respect to parameters. - batch_size: int or 0-D Tensor. Size - """ - self._params_grads = tuple(utils.ensure_sequence(params_grad) - for params_grad in params_grads) - self._batch_size = batch_size - super(NaiveDiagonalFactor, self).__init__() - - @property - def _var_scope(self): - return "ff_naivediag_" + scope_string_from_params( - [self._params_grads, self._batch_size]) - - @property - def _cov_shape(self): - size = sum(param_grad.shape.num_elements() - for param_grad in self._params_grads[0]) - return [size, 1] - - @property - def _num_sources(self): - return len(self._params_grads) - - @property - def _num_towers(self): - return 1 - - @property - def _dtype(self): - return self._params_grads[0][0].dtype - - def _compute_new_cov(self, source, tower): - assert tower == 0 - - params_grads_flat = utils.tensors_to_column(self._params_grads[source]) - return (math_ops.square(params_grads_flat) / math_ops.cast( - self._batch_size, params_grads_flat.dtype)) - - def _get_data_device(self, tower): - return None - - -class EmbeddingInputKroneckerFactor(DiagonalFactor): - r"""FisherFactor for input to an embedding layer. - - Given input_ids = [batch_size, input_size] representing indices into an - [vocab_size, embedding_size] embedding matrix, approximate input covariance by - a diagonal matrix, - - Cov(input_ids, input_ids) = - (1/batch_size) sum_{i} diag(n_hot(input[i]) ** 2). - - where n_hot() constructs an n-hot binary vector and diag() constructs a - diagonal matrix of size [vocab_size, vocab_size]. - """ - - def __init__(self, input_ids, vocab_size, dtype=None): - """Instantiate EmbeddingInputKroneckerFactor. - - Args: - input_ids: List of Tensors of shape [batch_size, input_size] and dtype - int32. Indices into embedding matrix. List index is tower. - vocab_size: int or 0-D Tensor. Maximum value for entries in 'input_ids'. - dtype: dtype for covariance statistics. Must be a floating point type. - Defaults to float32. - """ - self._input_ids = input_ids - self._vocab_size = vocab_size - self._cov_dtype = dtype or dtypes.float32 - - super(EmbeddingInputKroneckerFactor, self).__init__() - - @property - def _var_scope(self): - return "ff_diag_embedding_" + scope_string_from_params(self._input_ids) - - @property - def _cov_shape(self): - return [self._vocab_size] - - @property - def _num_sources(self): - return 1 - - @property - def _num_towers(self): - return len(self._input_ids) - - @property - def _dtype(self): - return self._cov_dtype - - def _compute_new_cov(self, source, tower): - assert source == 0 - - input_ids = self._input_ids[tower] - - if len(input_ids.shape) > 2: - raise ValueError( - "Input to embeddings must have rank <= 2. Found rank %d." % len( - input_ids.shape)) - - batch_size = array_ops.shape(input_ids)[0] - - # Transform indices into one-hot vectors. - # - # TODO(b/72714822): There must be a faster way to construct the diagonal - # covariance matrix! This operation is O(batch_size * vocab_size), where - # it should be O(batch_size * input_size). - flat_input_ids = array_ops.reshape(input_ids, [-1]) - one_hots = array_ops.one_hot(flat_input_ids, - self._vocab_size) # [?, vocab_size] - - # Take average across examples. Note that, because all entries have - # magnitude zero or one, there's no need to square the entries. - # - # TODO(b/72714822): Support for SparseTensor, other kinds of aggregation - # within an example such as average. - # - # TODO(b/72714822): Support for partitioned embeddings. - new_cov = math_ops.reduce_sum(one_hots, axis=0) # [vocab_size] - new_cov /= math_ops.cast(batch_size, new_cov.dtype) - - return new_cov - - def _get_data_device(self, tower): - return self._input_ids[tower].device - - -class FullyConnectedDiagonalFactor(DiagonalFactor): - r"""FisherFactor for a diagonal approx of a fully-connected layer's Fisher. - - Given in = [batch_size, input_size] and out_grad = [batch_size, output_size], - approximates the covariance as, - - Cov(in, out) = (1/batch_size) sum_{i} outer(in[i], out_grad[i]) ** 2.0 - - where the square is taken element-wise. - """ - - def __init__(self, - inputs, - outputs_grads, - has_bias=False): - """Instantiate FullyConnectedDiagonalFactor. - - Args: - inputs: List of Tensors of shape [batch_size, input_size]. Inputs to this - layer. List index is towers. - outputs_grads: List of Tensors, each of shape [batch_size, output_size], - which are the gradients of the loss with respect to the layer's - outputs. First index is source, second is tower. - - has_bias: bool. If True, append '1' to each input. - """ - self._inputs = inputs - self._has_bias = has_bias - self._outputs_grads = outputs_grads - self._squared_inputs = None - - super(FullyConnectedDiagonalFactor, self).__init__() - - @property - def _var_scope(self): - return "ff_diagfc_" + scope_string_from_params( - tuple(self._inputs) + tuple(nest.flatten(self._outputs_grads))) - - @property - def _cov_shape(self): - input_size = self._inputs[0].shape[1] + self._has_bias - output_size = self._outputs_grads[0][0].shape[1] - return [input_size, output_size] - - @property - def _num_sources(self): - return len(self._outputs_grads) - - @property - def _num_towers(self): - return len(self._inputs) - - @property - def _dtype(self): - return self._outputs_grads[0][0].dtype - - def make_covariance_update_op(self, ema_decay): - - self._squared_inputs = [] - for tower in range(self._num_towers): - inputs = self._inputs[tower] - - with place_on_device(self._get_data_device(tower)): - if self._has_bias: - inputs = append_homog(inputs) - self._squared_inputs.append(math_ops.square(inputs)) - - return super(FullyConnectedDiagonalFactor, self).make_covariance_update_op( - ema_decay) - - def _compute_new_cov(self, source, tower): - batch_size = array_ops.shape(self._squared_inputs[tower])[0] - outputs_grad = self._outputs_grads[source][tower] - - # The well-known special formula that uses the fact that the entry-wise - # square of an outer product is the outer-product of the entry-wise squares. - # The gradient is the outer product of the input and the output gradients, - # so we just square both and then take their outer-product. - new_cov = math_ops.matmul( - self._squared_inputs[tower], - math_ops.square(outputs_grad), - transpose_a=True) - new_cov /= math_ops.cast(batch_size, new_cov.dtype) - return new_cov - - def _get_data_device(self, tower): - return self._inputs[tower].device - - -class ConvDiagonalFactor(DiagonalFactor): - """FisherFactor for a diagonal approx of a convolutional layer's Fisher.""" - - def __init__(self, - inputs, - outputs_grads, - filter_shape, - strides, - padding, - data_format=None, - dilations=None, - has_bias=False): - """Creates a ConvDiagonalFactor object. - - Args: - inputs: List of Tensors of shape [batch_size, height, width, in_channels]. - Input activations to this layer. List index is towers. - outputs_grads: List of Tensors, each of shape [batch_size, - height, width, out_channels], which are the gradients of the loss - with respect to the layer's outputs. First index is source, second - index is tower. - filter_shape: Tuple of 4 ints: (kernel_height, kernel_width, in_channels, - out_channels). Represents shape of kernel used in this layer. - strides: The stride size in this layer (1-D Tensor of length 4). - padding: The padding in this layer (1-D of Tensor length 4). - data_format: None or str. Format of conv2d inputs. - dilations: None or tuple of 4 ints. - has_bias: Python bool. If True, the layer is assumed to have a bias - parameter in addition to its filter parameter. - - Raises: - ValueError: If inputs, output_grads, and filter_shape do not agree on - in_channels or out_channels. - ValueError: If strides, dilations are not length-4 lists of ints. - ValueError: If data_format does not put channel last. - """ - if not utils.is_data_format_channel_last(data_format): - raise ValueError("Channel must be last.") - if any(input_.shape.ndims != 4 for input_ in inputs): - raise ValueError("inputs must be a list of 4-D Tensors.") - if any(input_.shape.as_list()[-1] != filter_shape[-2] for input_ in inputs): - raise ValueError("inputs and filter_shape must agree on in_channels.") - for i, outputs_grad in enumerate(outputs_grads): - if any(output_grad.shape.ndims != 4 for output_grad in outputs_grad): - raise ValueError("outputs[%d] must be 4-D Tensor." % i) - if any(output_grad.shape.as_list()[-1] != filter_shape[-1] - for output_grad in outputs_grad): - raise ValueError( - "outputs[%d] and filter_shape must agree on out_channels." % i) - if len(strides) != 4: - raise ValueError("strides must be length-4 list of ints.") - if dilations is not None and len(dilations) != 4: - raise ValueError("dilations must be length-4 list of ints.") - - self._inputs = inputs - self._outputs_grads = outputs_grads - self._filter_shape = filter_shape - self._strides = strides - self._padding = padding - self._data_format = data_format - self._dilations = dilations - self._has_bias = has_bias - self._patches = None - - super(ConvDiagonalFactor, self).__init__() - - @property - def _var_scope(self): - return "ff_convdiag_" + scope_string_from_params( - tuple(self._inputs) + tuple(nest.flatten(self._outputs_grads))) - - @property - def _cov_shape(self): - filter_height, filter_width, in_channels, out_channels = self._filter_shape - return [ - filter_height * filter_width * in_channels + self._has_bias, - out_channels - ] - - @property - def _num_sources(self): - return len(self._outputs_grads) - - @property - def _num_towers(self): - return len(self._inputs) - - @property - def _dtype(self): - return self._inputs[0].dtype - - def make_covariance_update_op(self, ema_decay): - filter_height, filter_width, _, _ = self._filter_shape - - # TODO(b/64144716): there is potential here for a big savings in terms - # of memory use. - if self._dilations is None: - rates = (1, 1, 1, 1) - else: - rates = tuple(self._dilations) - - self._patches = [] - for tower in range(self._num_towers): - with place_on_device(self._get_data_device(tower)): - patches = array_ops.extract_image_patches( - self._inputs[tower], - ksizes=[1, filter_height, filter_width, 1], - strides=self._strides, - rates=rates, - padding=self._padding) - - if self._has_bias: - patches = append_homog(patches) - - self._patches.append(patches) - - return super(ConvDiagonalFactor, self).make_covariance_update_op(ema_decay) - - def _compute_new_cov(self, source, tower): - patches = self._patches[tower] - batch_size = array_ops.shape(patches)[0] - outputs_grad = self._outputs_grads[source][tower] - - new_cov = self._convdiag_sum_of_squares(patches, outputs_grad) - new_cov /= math_ops.cast(batch_size, new_cov.dtype) - - return new_cov - - def _convdiag_sum_of_squares(self, patches, outputs_grad): - # This computes the sum of the squares of the per-training-case "gradients". - # It does this simply by computing a giant tensor containing all of these, - # doing an entry-wise square, and them summing along the batch dimension. - case_wise_gradients = special_math_ops.einsum("bijk,bijl->bkl", patches, - outputs_grad) - return math_ops.reduce_sum(math_ops.square(case_wise_gradients), axis=0) - - def _get_data_device(self, tower): - return self._inputs[tower].device - - -class FullyConnectedKroneckerFactor(DenseSquareMatrixFactor): - """Kronecker factor for the input or output side of a fully-connected layer. - """ - - def __init__(self, - tensors, - has_bias=False): - """Instantiate FullyConnectedKroneckerFactor. - - Args: - tensors: List of list of Tensors, each of shape [batch_size, n]. The - Tensors are typically either a layer's inputs or its output's gradients. - The first list index is source, the second is tower. - has_bias: bool. If True, append '1' to each row. - """ - # The tensor argument is either a tensor of input activations or a tensor of - # output pre-activation gradients. - self._has_bias = has_bias - self._tensors = tensors - super(FullyConnectedKroneckerFactor, self).__init__() - - @property - def _var_scope(self): - return "ff_fckron_" + scope_string_from_params( - tuple(nest.flatten(self._tensors)) + (self._has_bias,)) - - @property - def _cov_shape(self): - size = self._tensors[0][0].shape[1] + self._has_bias - return [size, size] - - @property - def _num_sources(self): - return len(self._tensors) - - @property - def _num_towers(self): - return len(self._tensors[0]) - - @property - def _dtype(self): - return self._tensors[0][0].dtype - - def _compute_new_cov(self, source, tower): - tensor = self._tensors[source][tower] - if self._has_bias: - tensor = append_homog(tensor) - return compute_cov(tensor) - - def _get_data_device(self, tower): - return self._tensors[0][tower].device - - -class ConvInputKroneckerFactor(DenseSquareMatrixFactor): - r"""Kronecker factor for the input side of a convolutional layer. - - Estimates E[ a a^T ] where a is the inputs to a convolutional layer given - example x. Expectation is taken over all examples and locations. - - Equivalent to Omega in https://arxiv.org/abs/1602.01407 for details. See - Section 3.1 Estimating the factors. - """ - - def __init__(self, - inputs, - filter_shape, - padding, - strides=None, - dilation_rate=None, - data_format=None, - extract_patches_fn=None, - has_bias=False, - sub_sample_inputs=None, - sub_sample_patches=None): - """Initializes ConvInputKroneckerFactor. - - Args: - inputs: List of Tensors of shape [batch_size, ..spatial_input_size.., - in_channels]. Inputs to layer. List index is tower. - filter_shape: List of ints. Contains [..spatial_filter_size.., - in_channels, out_channels]. Shape of convolution kernel. - padding: str. Padding method for layer. "SAME" or "VALID". - strides: List of ints or None. Contains [..spatial_filter_strides..] if - 'extract_patches_fn' is compatible with tf.nn.convolution(), else - [1, ..spatial_filter_strides, 1]. - dilation_rate: List of ints or None. Rate for dilation along each spatial - dimension if 'extract_patches_fn' is compatible with - tf.nn.convolution(), else [1, ..spatial_dilation_rates.., 1]. - data_format: str or None. Format of input data. - extract_patches_fn: str or None. Name of function that extracts image - patches. One of "extract_convolution_patches", "extract_image_patches", - "extract_pointwise_conv2d_patches". - has_bias: bool. If True, append 1 to in_channel. - sub_sample_inputs: `bool`. If True, then subsample the inputs from which - the image patches are extracted. (Default: None) - sub_sample_patches: `bool`, If `True` then subsample the extracted - patches.(Default: None) - """ - self._inputs = inputs - self._filter_shape = filter_shape - self._strides = strides - self._padding = padding - self._dilation_rate = dilation_rate - self._data_format = data_format - self._extract_patches_fn = extract_patches_fn - self._has_bias = has_bias - if sub_sample_inputs is None: - self._sub_sample_inputs = _SUB_SAMPLE_INPUTS - else: - self._sub_sample_inputs = sub_sample_inputs - - if sub_sample_patches is None: - self._sub_sample_patches = _SUB_SAMPLE_OUTER_PRODUCTS - else: - self._sub_sample_patches = sub_sample_patches - super(ConvInputKroneckerFactor, self).__init__() - - @property - def _var_scope(self): - return "ff_convinkron_" + scope_string_from_params( - tuple(self._inputs) + - tuple((self._filter_shape, self._strides, self._padding, - self._dilation_rate, self._data_format, self._has_bias))) - - @property - def _cov_shape(self): - spatial_filter_shape = self._filter_shape[0:-2] - in_channels = self._filter_shape[-2] - size = np.prod(spatial_filter_shape) * in_channels + self._has_bias - return [size, size] - - @property - def _num_sources(self): - return 1 - - @property - def _num_towers(self): - return len(self._inputs) - - @property - def _dtype(self): - return self._inputs[0].dtype - - def _compute_new_cov(self, source, tower): - assert source == 0 - - inputs = self._inputs[tower] - if self._sub_sample_inputs: - batch_size = inputs.shape.as_list()[0] - max_size = int(batch_size * _INPUTS_TO_EXTRACT_PATCHES_FACTOR) - inputs = _random_tensor_gather(inputs, max_size) - - # TODO(b/64144716): there is potential here for a big savings in terms of - # memory use. - if self._extract_patches_fn in [None, "extract_convolution_patches"]: - patches = utils.extract_convolution_patches( - inputs, - self._filter_shape, - padding=self._padding, - strides=self._strides, - dilation_rate=self._dilation_rate, - data_format=self._data_format) - - elif self._extract_patches_fn == "extract_image_patches": - assert inputs.shape.ndims == 4 - assert len(self._filter_shape) == 4 - assert len(self._strides) == 4, self._strides - if self._dilation_rate is None: - rates = [1, 1, 1, 1] - else: - rates = self._dilation_rate - assert len(rates) == 4 - assert rates[0] == rates[-1] == 1 - patches = array_ops.extract_image_patches( - inputs, - ksizes=[1] + list(self._filter_shape[0:-2]) + [1], - strides=self._strides, - rates=rates, - padding=self._padding) - - elif self._extract_patches_fn == "extract_pointwise_conv2d_patches": - assert self._strides in [None, [1, 1, 1, 1], (1, 1, 1, 1)] - assert self._filter_shape[0] == self._filter_shape[1] == 1 - patches = utils.extract_pointwise_conv2d_patches( - inputs, self._filter_shape, data_format=None) - - else: - raise NotImplementedError(self._extract_patches_fn) - - flatten_size = np.prod(self._filter_shape[0:-1]) - # patches_flat below is the matrix [[A_l]] from the KFC paper (tilde - # omitted over A for clarity). It has shape M|T| x J|Delta| (eq. 14), - # where M = minibatch size, |T| = number of spatial locations, - # |Delta| = number of spatial offsets, and J = number of input maps - # for convolutional layer l. - patches_flat = array_ops.reshape(patches, [-1, flatten_size]) - - # We append a homogenous coordinate to patches_flat if the layer has - # bias parameters. This gives us [[A_l]]_H from the paper. - if self._sub_sample_patches: - patches_flat = _subsample_for_cov_computation(patches_flat) - - if self._has_bias: - patches_flat = append_homog(patches_flat) - # We call compute_cov without passing in a normalizer. compute_cov uses - # the first dimension of patches_flat i.e. M|T| as the normalizer by - # default. Hence we end up computing 1/M|T| * [[A_l]]^T [[A_l]], with - # shape J|Delta| x J|Delta|. This is related to hat{Omega}_l from - # the paper but has a different scale here for consistency with - # ConvOutputKroneckerFactor. - # (Tilde omitted over A for clarity.) - return compute_cov(patches_flat) - - def _get_data_device(self, tower): - return self._inputs[tower].device - - -class ConvOutputKroneckerFactor(DenseSquareMatrixFactor): - r"""Kronecker factor for the output side of a convolutional layer. - - Estimates E[ ds ds^T ] where s is the preactivations of a convolutional layer - given example x and ds = (d / d s) log(p(y|x, w)). Expectation is taken over - all examples and locations. - - Equivalent to Gamma in https://arxiv.org/abs/1602.01407 for details. See - Section 3.1 Estimating the factors. - """ - - def __init__(self, outputs_grads, data_format=None): - """Initializes ConvOutputKroneckerFactor. - - Args: - outputs_grads: List of list of Tensors. Each Tensor is of shape - [batch_size, ..spatial_input_size.., out_channels]. First list index - is source, the second is tower. - data_format: None or str. Format of outputs_grads. - - Raises: - ValueError: If channels are not final dimension. - """ - if not utils.is_data_format_channel_last(data_format): - raise ValueError("Channel must be last.") - self._out_channels = outputs_grads[0][0].shape.as_list()[-1] - self._outputs_grads = outputs_grads - super(ConvOutputKroneckerFactor, self).__init__() - - @property - def _var_scope(self): - return "ff_convoutkron_" + scope_string_from_params( - nest.flatten(self._outputs_grads)) - - @property - def _cov_shape(self): - size = self._out_channels - return [size, size] - - @property - def _num_sources(self): - return len(self._outputs_grads) - - @property - def _num_towers(self): - return len(self._outputs_grads[0]) - - @property - def _dtype(self): - return self._outputs_grads[0][0].dtype - - def _compute_new_cov(self, source, tower): - outputs_grad = self._outputs_grads[source][tower] - - # reshaped_tensor below is the matrix DS_l defined in the KFC paper - # (tilde omitted over S for clarity). It has shape M|T| x I, where - # M = minibatch size, |T| = number of spatial locations, and - # I = number of output maps for convolutional layer l. - reshaped_tensor = array_ops.reshape(outputs_grad, [-1, self._out_channels]) - # Following the reasoning in ConvInputKroneckerFactor._compute_new_cov, - # compute_cov here returns 1/M|T| * DS_l^T DS_l = hat{Gamma}_l - # as defined in the paper, with shape I x I. - # (Tilde omitted over S for clarity.) - return compute_cov(reshaped_tensor) - - def _get_data_device(self, tower): - return self._outputs_grads[0][tower].device - - -class FullyConnectedMultiKF(FullyConnectedKroneckerFactor): - """Kronecker factor for a fully connected layer used multiple times.""" - - def __init__(self, - tensors, - num_uses=None, - has_bias=False): - """Constructs a new `FullyConnectedMultiKF`. - - Args: - tensors: List of list of Tensors of shape, each of shape - [num_uses * batch_size, n], and is a reshape version of a Tensor of - shape [num_uses, batch_size, n]. Each of these tensors is usually a - layer's inputs or its output's gradients. The first list index is - sources, the second is towers. - num_uses: int. The number of time-steps / uses. - has_bias: bool. If True, '1' is appended to each row. - """ - - self._num_uses = num_uses - - self._cov_dt1 = None - self._make_cov_dt1 = False - self._option1quants_by_damping = {} - self._option2quants_by_damping = {} - self._option1quants_registrations = set() - self._option2quants_registrations = set() - - super(FullyConnectedMultiKF, self).__init__(tensors=tensors, - has_bias=has_bias) - - @property - def _num_timesteps(self): - return self._num_uses - - @property - def _var_scope(self): - return "ff_fc_multi_" + scope_string_from_params( - tuple(nest.flatten(self._tensors)) - + (self._num_timesteps, self._has_bias,)) - - def make_covariance_update_op(self, ema_decay): - - op = super(FullyConnectedMultiKF, self).make_covariance_update_op(ema_decay) - - if self._cov_dt1 is not None: - new_cov_dt1_contribs = [] - for source in range(self._num_sources): - for tower in range(self._num_towers): - with place_on_device(self._get_data_device(tower)): - new_cov_dt1_contribs.append(self._compute_new_cov_dt1(source, - tower)) - - new_cov_dt1 = (math_ops.add_n(new_cov_dt1_contribs) - / float(self._num_towers)) - - # See comments in FisherFactor.make_covariance_update_op() for details. - if utils.on_tpu(): - new_cov_dt1 = utils.cross_replica_mean(new_cov_dt1) - - op2 = moving_averages.assign_moving_average( - self._cov_dt1, new_cov_dt1, ema_decay, zero_debias=ZERO_DEBIAS) - - # TODO(b/69112164): - # It's important that _cov and _cov_dt1 remain consistent with each - # other while the inverse ops are happening. How can we ensure this? - # We will need to add explicit synchronization for this to - # work with asynchronous training. - op = control_flow_ops.group(op, op2) - - return op - - def _compute_new_cov_dt1(self, source, tower): # pylint: disable=missing-docstring - tensor = self._tensors[source][tower] - if self._has_bias: - # This appending is technically done twice (the other time is for - # _compute_new_cov()) - tensor = append_homog(tensor) - - total_len = array_ops.shape(tensor)[0] - batch_size = total_len // self._num_timesteps - - tensor_present = tensor[:-batch_size, :] - tensor_future = tensor[batch_size:, :] - - # We specify a normalizer for this computation to ensure a PSD Fisher - # block estimate. This is equivalent to padding with zeros, as was done - # in Section B.2 of the appendix. - return compute_cov( - tensor_future, tensor_right=tensor_present, normalizer=total_len) - - def _get_data_device(self, tower): - return self._tensors[0][tower].device - - @property - def _vec_shape(self): - size = self._tensors[0][0].shape[1] + self._has_bias - return [size] - - def get_option1quants(self, damping_func): - damping_id = graph_func_to_id(damping_func) - return self._option1quants_by_damping[damping_id] - - def get_option2quants(self, damping_func): - damping_id = graph_func_to_id(damping_func) - return self._option2quants_by_damping[damping_id] - - def get_cov_dt1(self): - assert self._cov_dt1 is not None - return self._cov_dt1 - - def register_cov_dt1(self): - self._make_cov_dt1 = True - - def instantiate_cov_variables(self): - super(FullyConnectedMultiKF, self).instantiate_cov_variables() - assert self._cov_dt1 is None - if self._make_cov_dt1: - with variable_scope.variable_scope(self._var_scope): - self._cov_dt1 = variable_scope.get_variable( - "cov_dt1", - initializer=init_ops.zeros_initializer, - shape=self._cov_shape, - trainable=False, - dtype=self._dtype) - - def register_option1quants(self, damping_func): - damping_id = self._register_damping(damping_func) - if damping_id not in self._option1quants_registrations: - self._option1quants_registrations.add(damping_id) - - def register_option2quants(self, damping_func): - damping_id = self._register_damping(damping_func) - if damping_id not in self._option2quants_registrations: - self._option2quants_registrations.add(damping_id) - - def instantiate_inv_variables(self): - super(FullyConnectedMultiKF, self).instantiate_inv_variables() - - for damping_id in self._option1quants_registrations: - damping_func = self._damping_funcs_by_id[damping_id] - damping_string = graph_func_to_string(damping_func) - # It's questionable as to whether we should initialize with stuff like - # this at all. Ideally these values should never be used until they are - # updated at least once. - with variable_scope.variable_scope(self._var_scope): - Lmat = variable_scope.get_variable( # pylint: disable=invalid-name - "Lmat_damp{}".format(damping_string), - initializer=inverse_initializer, - shape=self._cov_shape, - trainable=False, - dtype=self._dtype) - psi = variable_scope.get_variable( - "psi_damp{}".format(damping_string), - initializer=init_ops.ones_initializer, - shape=self._vec_shape, - trainable=False, - dtype=self._dtype) - - assert damping_id not in self._option1quants_by_damping - self._option1quants_by_damping[damping_id] = (Lmat, psi) - - for damping_id in self._option2quants_registrations: - damping_func = self._damping_funcs_by_id[damping_id] - damping_string = graph_func_to_string(damping_func) - # It's questionable as to whether we should initialize with stuff like - # this at all. Ideally these values should never be used until they are - # updated at least once. - with variable_scope.variable_scope(self._var_scope): - Pmat = variable_scope.get_variable( # pylint: disable=invalid-name - "Lmat_damp{}".format(damping_string), - initializer=inverse_initializer, - shape=self._cov_shape, - trainable=False, - dtype=self._dtype) - Kmat = variable_scope.get_variable( # pylint: disable=invalid-name - "Kmat_damp{}".format(damping_string), - initializer=inverse_initializer, - shape=self._cov_shape, - trainable=False, - dtype=self._dtype) - mu = variable_scope.get_variable( - "mu_damp{}".format(damping_string), - initializer=init_ops.ones_initializer, - shape=self._vec_shape, - trainable=False, - dtype=self._dtype) - - assert damping_id not in self._option2quants_by_damping - self._option2quants_by_damping[damping_id] = (Pmat, Kmat, mu) - - def make_inverse_update_ops(self): - """Create and return update ops corresponding to registered computations.""" - # TODO(b/69918258): Add correctness tests for this method. - # pylint: disable=invalid-name - - ops = [] - - if (len(self._option1quants_by_damping) + - len(self._option2quants_by_damping)): - - # Note that C0 and C1 are stand-ins for A0 and A1, or G0 and G1, from - # the pseudo-code in the original paper. Because the computations for - # the A and G case are essentially the same they can both be performed by - # the same class (this one). - - C1 = self.get_cov_dt1() - - # Get the eigendecomposition of C0 (= self.get_cov()) - eigen_e, eigen_V = self.get_eigendecomp() - - # TODO(b/69678661): Note, there is an implicit assumption here that C1 - # and C0 (as represented here by its eigen-decomp) are consistent. This - # could fail to be the case if self._cov and self._cov_dt1 are not updated - # consistently, or are somehow read between or during the cov updates. - # Can this possibly happen? Is there a way to prevent it? - - for damping_id, (Lmat_var, - psi_var) in self._option1quants_by_damping.items(): - - damping = self._damping_funcs_by_id[damping_id]() - damping = math_ops.cast(damping, self._dtype) - - invsqrtC0 = math_ops.matmul( - eigen_V * (eigen_e + damping)**(-0.5), eigen_V, transpose_b=True) - - # Might need to enforce symmetry lost due to numerical issues. - invsqrtC0 = (invsqrtC0 + array_ops.transpose(invsqrtC0)) / 2.0 - - # The following line imposes the symmetry assumed by "Option 1" on C1. - # Strangely the code can work okay with this line commented out, - # depending on how psd_eig is defined. I'm not sure why. - C1 = (C1 + array_ops.transpose(C1)) / 2.0 - - # hPsi = C0^(-1/2) * C1 * C0^(-1/2) (hPsi means hat{Psi}) - hPsi = math_ops.matmul(math_ops.matmul(invsqrtC0, C1), invsqrtC0) - - # Compute the decomposition U*diag(psi)*U^T = hPsi - psi, U = utils.posdef_eig(hPsi) - - # L = C0^(-1/2) * U - Lmat = math_ops.matmul(invsqrtC0, U) - - ops.append(Lmat_var.assign(Lmat)) - ops.append(psi_var.assign(psi)) - - for damping_id, (Pmat_var, Kmat_var, - mu_var) in self._option2quants_by_damping.items(): - - damping = self._damping_funcs_by_id[damping_id]() - damping = math_ops.cast(damping, self._dtype) - - # compute C0^(-1/2) - invsqrtC0 = math_ops.matmul( - eigen_V * (eigen_e + damping)**(-0.5), eigen_V, transpose_b=True) - - # Might need to enforce symmetry lost due to numerical issues. - invsqrtC0 = (invsqrtC0 + array_ops.transpose(invsqrtC0)) / 2.0 - - # Compute the product C0^(-1/2) * C1 - invsqrtC0C1 = math_ops.matmul(invsqrtC0, C1) - - # hPsi = C0^(-1/2) * C1 * C0^(-1/2) (hPsi means hat{Psi}) - hPsi = math_ops.matmul(invsqrtC0C1, invsqrtC0) - - # Compute the decomposition E*diag(mu)*E^T = hPsi^T * hPsi - # Note that we using the notation mu instead of "m" for the eigenvalues. - # Instead of computing the product hPsi^T * hPsi and then doing an - # eigen-decomposition of this we just compute the SVD of hPsi and then - # square the singular values to get the eigenvalues. For a justification - # of this approach, see: - # https://en.wikipedia.org/wiki/Singular-value_decomposition#Relation_to_eigenvalue_decomposition - sqrtmu, _, E = linalg_ops.svd(hPsi) - mu = math_ops.square(sqrtmu) - - # Mathematically, the eigenvalues should not should not exceed 1.0, but - # due to numerical issues, or possible issues with inconsistent - # values of C1 and (the eigen-decomposition of) C0 they might. So - # we enforce this condition. - mu = math_ops.minimum(mu, 1.0) - - # P = (C0^(-1/2) * C1)^T * C0^(-1/2) = C_1^T * C_0^(-1) - Pmat = math_ops.matmul(invsqrtC0C1, invsqrtC0, transpose_a=True) - - # K = C_0^(-1/2) * E - Kmat = math_ops.matmul(invsqrtC0, E) - - ops.append(Pmat_var.assign(Pmat)) - ops.append(Kmat_var.assign(Kmat)) - ops.append(mu_var.assign(mu)) - - ops += super(FullyConnectedMultiKF, self).make_inverse_update_ops() - return [control_flow_ops.group(*ops)] - - # pylint: enable=invalid-name diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py b/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py deleted file mode 100644 index 2d8e378a93..0000000000 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""FisherFactor definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.kfac.python.ops.fisher_factors import * -from tensorflow.python.util.all_util import remove_undocumented -# pylint: enable=unused-import,line-too-long,wildcard-import - -_allowed_symbols = [ - "inverse_initializer", "covariance_initializer", - "diagonal_covariance_initializer", "scope_string_from_params", - "scope_string_from_name", "scalar_or_tensor_to_string", "FisherFactor", - "InverseProvidingFactor", "FullFactor", "DiagonalFactor", - "NaiveDiagonalFactor", "EmbeddingInputKroneckerFactor", - "FullyConnectedDiagonalFactor", "FullyConnectedKroneckerFactor", - "ConvInputKroneckerFactor", "ConvOutputKroneckerFactor", - "ConvDiagonalFactor", "set_global_constants", "maybe_colocate_with", - "compute_cov", "append_homog" -] - -remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py deleted file mode 100644 index 43aa713edc..0000000000 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ /dev/null @@ -1,1269 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Registry for layers and their parameters/variables. - -This represents the collection of all layers in the approximate Fisher -information matrix to which a particular FisherBlock may belong. That is, we -might have several layer collections for one TF graph (if we have multiple K-FAC -optimizers being used, for example.) -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import defaultdict -from collections import OrderedDict -from contextlib import contextmanager -from functools import partial -import warnings - -import math -import six - -from tensorflow.contrib.kfac.python.ops import fisher_blocks as fb -from tensorflow.contrib.kfac.python.ops import loss_functions as lf -from tensorflow.contrib.kfac.python.ops import utils -from tensorflow.python.framework import ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.util import nest - -# Names for various approximations that can be requested for Fisher blocks. -APPROX_KRONECKER_NAME = "kron" -APPROX_DIAGONAL_NAME = "diagonal" -APPROX_FULL_NAME = "full" - -_GENERIC_APPROX_TO_BLOCK_TYPES = { - APPROX_FULL_NAME: fb.FullFB, - APPROX_DIAGONAL_NAME: fb.NaiveDiagonalFB, -} - -_FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES = { - APPROX_KRONECKER_NAME: fb.FullyConnectedKFACBasicFB, - APPROX_DIAGONAL_NAME: fb.FullyConnectedDiagonalFB, -} - -_CONV2D_APPROX_TO_BLOCK_TYPES = { - APPROX_KRONECKER_NAME: fb.ConvKFCBasicFB, - APPROX_DIAGONAL_NAME: fb.ConvDiagonalFB, -} - -_EMBEDDING_APPROX_TO_BLOCK_TYPES = { - APPROX_KRONECKER_NAME: fb.EmbeddingKFACFB -} - -APPROX_KRONECKER_INDEP_NAME = "kron_indep" -APPROX_KRONECKER_SERIES_1_NAME = "kron_series_1" -APPROX_KRONECKER_SERIES_2_NAME = "kron_series_2" - -_FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES = { - APPROX_KRONECKER_INDEP_NAME: fb.FullyConnectedMultiIndepFB, - APPROX_KRONECKER_SERIES_1_NAME: partial(fb.FullyConnectedSeriesFB, - option=1), - APPROX_KRONECKER_SERIES_2_NAME: partial(fb.FullyConnectedSeriesFB, - option=2) -} - -_CONV2D_MULTI_APPROX_TO_BLOCK_TYPES = { - APPROX_KRONECKER_INDEP_NAME: fb.ConvKFCBasicMultiIndepFB -} - -_EMBEDDING_MULTI_APPROX_TO_BLOCK_TYPES = { - APPROX_KRONECKER_INDEP_NAME: fb.EmbeddingKFACMultiIndepFB -} - -# Possible value for `reuse` keyword argument. Sets `reuse` to -# tf.get_variable_scope().reuse. -VARIABLE_SCOPE = "VARIABLE_SCOPE" - -_DEFAULT_LAYER_COLLECTION = None - - -def get_default_layer_collection(): - """Get default LayerCollection.""" - if _DEFAULT_LAYER_COLLECTION is None: - raise ValueError( - "Attempted to retrieve default LayerCollection when none is set. Use " - "LayerCollection.as_default().") - - return _DEFAULT_LAYER_COLLECTION - - -def set_default_layer_collection(layer_collection): - global _DEFAULT_LAYER_COLLECTION - - if _DEFAULT_LAYER_COLLECTION is not None and layer_collection is not None: - raise ValueError("Default LayerCollection is already set.") - - _DEFAULT_LAYER_COLLECTION = layer_collection - - -class LayerParametersDict(OrderedDict): - """An OrderedDict where keys are Tensors or tuples of Tensors. - - Ensures that no Tensor is associated with two different keys. - """ - - def __init__(self, *args, **kwargs): - self._tensors = set() - super(LayerParametersDict, self).__init__(*args, **kwargs) - - def __setitem__(self, key, value): - key = self._canonicalize_key(key) - tensors = key if isinstance(key, (tuple, list)) else (key,) - key_collisions = self._tensors.intersection(tensors) - if key_collisions: - raise ValueError("Key(s) already present: {}".format(key_collisions)) - self._tensors.update(tensors) - super(LayerParametersDict, self).__setitem__(key, value) - - def __delitem__(self, key): - key = self._canonicalize_key(key) - self._tensors.remove(key) - super(LayerParametersDict, self).__delitem__(key) - - def __getitem__(self, key): - key = self._canonicalize_key(key) - return super(LayerParametersDict, self).__getitem__(key) - - def __contains__(self, key): - key = self._canonicalize_key(key) - return super(LayerParametersDict, self).__contains__(key) - - def _canonicalize_key(self, key): - if isinstance(key, (list, tuple)): - return tuple(key) - return key - - -# TODO(b/68034464): add capability for LayerCollection to be "finalized" -# and do this when it gets used by FisherEstimator / KfacOptimizer. - - -class LayerCollection(object): - """Registry of information about layers and losses. - - Note that you need to create a new one of these for each MatrixEstimator or - KfacOptimizer. - - Attributes: - fisher_blocks: a LayersParamsDict (subclass of OrderedDict) mapping layer - parameters (Tensors or tuples of Tensors) to FisherBlock instances. - fisher_factors: an OrderedDict mapping tuples to FisherFactor instances. - losses: a list of LossFunction objects. The loss to be optimized is their - sum. - loss_colocation_ops: ops to colocate loss function evaluations with. These - will typically be the inputs to the losses. - """ - - def __init__(self, - graph=None, - name="LayerCollection"): - warnings.warn( - "tf.contrib.kfac is deprecated and will be removed by 2018-11-01. " - "Use https://pypi.python.org/pypi/kfac instead.") - self.fisher_blocks = LayerParametersDict() - self.fisher_factors = OrderedDict() - self._linked_parameters = dict( - ) # dict mapping sets of variables to optionally specified approximations. - self._graph = graph or ops.get_default_graph() - self._loss_dict = {} # {str: LossFunction} - self._subgraph = None - self._default_generic_approximation = APPROX_DIAGONAL_NAME - self._default_embedding_approximation = APPROX_KRONECKER_NAME - self._default_fully_connected_approximation = APPROX_KRONECKER_NAME - self._default_conv2d_approximation = APPROX_KRONECKER_NAME - self._default_fully_connected_multi_approximation = ( - APPROX_KRONECKER_INDEP_NAME) - self._default_conv2d_multi_approximation = ( - APPROX_KRONECKER_INDEP_NAME) - self._default_embedding_multi_approximation = APPROX_KRONECKER_INDEP_NAME - self.loss_colocation_ops = {} - self._vars_to_uses = defaultdict(lambda: 0) - - with variable_scope.variable_scope(None, default_name=name) as scope: - self._var_scope = scope.name - - @property - def losses(self): - """Tuple of LossFunction objects registered with this LayerCollection.""" - return nest.flatten(self.towers_by_loss) - - @property - def towers_by_loss(self): - """Tuple across losses of LossFunction objects registered to each tower.""" - return tuple(tuple(lst) for lst in self._loss_dict.values()) - - @property - def registered_variables(self): - """A tuple of all of the variables currently registered.""" - tuple_of_tuples = (utils.ensure_sequence(key) for key, block - in six.iteritems(self.fisher_blocks)) - flat_tuple = tuple(item for tuple_ in tuple_of_tuples for item in tuple_) - return flat_tuple - - @property - def linked_parameters(self): - """Groups of parameters with an optionally specified approximation. - - Linked parameters can be added using `define_linked_parameters`. - If an approximation is specified, then this approximation will be used - when registering a layer with exactly these parameters, unless an - approximation is specified when calling the registration function. - - Returns: - A `dict` mapping tuples of parameters to an optional string. - """ - return self._linked_parameters - - @property - def default_embedding_approximation(self): - return self._default_embedding_approximation - - def set_default_embedding_approximation(self, value): - if value != APPROX_KRONECKER_NAME: - raise ValueError( - "{} is not a valid approximation for embedding variables.".format( - value)) - self._default_embedding_approximation = value - - @property - def default_generic_approximation(self): - return self._default_generic_approximation - - def set_default_generic_approximation(self, value): - if value not in _GENERIC_APPROX_TO_BLOCK_TYPES: - raise ValueError( - "{} is not a valid approximation for generic variables.".format( - value)) - self._default_generic_approximation = value - - @property - def default_fully_connected_approximation(self): - return self._default_fully_connected_approximation - - def set_default_fully_connected_approximation(self, value): - if value not in _FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES: - raise ValueError( - "{} is not a valid approximation for fully connected layers.".format( - value)) - self._default_fully_connected_approximation = value - - @property - def default_conv2d_approximation(self): - return self._default_conv2d_approximation - - def set_default_conv2d_approximation(self, value): - if value not in _CONV2D_APPROX_TO_BLOCK_TYPES: - raise ValueError( - "{} is not a valid approximation for 2d convolutional layers.".format( - value)) - self._default_conv2d_approximation = value - - @property - def default_fully_connected_multi_approximation(self): - return self._default_fully_connected_multi_approximation - - def set_default_fully_connected_multi_approximation(self, value): - if value not in _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES: - raise ValueError("{} is not a valid approximation for a fully-connected " - "multi layer.".format(value)) - self._default_fully_connected_multi_approximation = value - - @property - def default_conv2d_multi_approximation(self): - return self._default_conv2d_multi_approximation - - @property - def default_embedding_multi_approximation(self): - return self._default_embedding_multi_approximation - - def register_block(self, layer_key, fisher_block, reuse=VARIABLE_SCOPE): - """Validates and registers the layer_key associated with the fisher_block. - - Args: - layer_key: A variable or tuple of variables. The key to check for in - existing registrations and to register if valid. - fisher_block: The associated `FisherBlock`. - reuse: Method to use for inserting new `FisherBlock's. One of True, False, - or `VARIABLE_SCOPE`. - - Raises: - ValueError: If `layer_key` was already registered and reuse is `False`, - if `layer_key` was registered with a different block type, or if - `layer_key` shares any variables with but is not equal to a previously - registered key. - KeyError: If `reuse` is `True` but `layer_key` was not previously - registered. - - Returns: - The `FisherBlock` registered under `layer_key`. If `layer_key` was already - registered, this will be the previously registered `FisherBlock`. - """ - if reuse is VARIABLE_SCOPE: - reuse = variable_scope.get_variable_scope().reuse - - if reuse is True or (reuse is variable_scope.AUTO_REUSE and - layer_key in self.fisher_blocks): - result = self.fisher_blocks[layer_key] - if type(result) != type(fisher_block): # pylint: disable=unidiomatic-typecheck - raise ValueError( - "Attempted to register FisherBlock of type %s when existing " - "FisherBlock has type %s." % (type(fisher_block), type(result))) - return result - if reuse is False and layer_key in self.fisher_blocks: - raise ValueError("FisherBlock for %s is already in LayerCollection." % - (layer_key,)) - - # Insert fisher_block into self.fisher_blocks. - if layer_key in self.fisher_blocks: - raise ValueError("Duplicate registration: {}".format(layer_key)) - # Raise an error if any variable in layer_key has been registered in any - # other blocks. - variable_to_block = { - var: (params, block) - for (params, block) in self.fisher_blocks.items() - for var in utils.ensure_sequence(params) - } - for variable in utils.ensure_sequence(layer_key): - if variable in variable_to_block: - prev_key, prev_block = variable_to_block[variable] - raise ValueError( - "Attempted to register layer_key {} with block {}, but variable {}" - " was already registered in key {} with block {}.".format( - layer_key, fisher_block, variable, prev_key, prev_block)) - self.fisher_blocks[layer_key] = fisher_block - return fisher_block - - def register_loss_function(self, - loss, - colocation_op, - base_name, - name=None, - reuse=VARIABLE_SCOPE): - """Registers a LossFunction object. - - Args: - loss: The LossFunction object. - colocation_op: The op to colocate the loss function's computations with. - base_name: The name to derive a new unique name from is the name argument - is None. - name: (OPTIONAL) str or None. Unique name for this loss function. If None, - a new name is generated. (Default: None) - reuse: (OPTIONAL) bool or str. If True, adds `loss` as an additional - tower for the existing loss function. - - Raises: - ValueError: If reuse == True and name == None. - ValueError: If reuse == True and seed != None. - KeyError: If reuse == True and no existing LossFunction with `name` found. - KeyError: If reuse == False and existing LossFunction with `name` found. - """ - - name = name or self._graph.unique_name(base_name) - - if reuse == VARIABLE_SCOPE: - reuse = variable_scope.get_variable_scope().reuse - - if reuse: - if name is None: - raise ValueError( - "If reuse is enabled, loss function's name must be set.") - - loss_list = self._loss_dict.get(name, None) - - if loss_list is None: - raise KeyError( - "Unable to find loss function named {}. Register a new loss " - "function with reuse=False.".format(name)) - else: - if name in self._loss_dict: - raise KeyError( - "Loss function named {} already exists. Set reuse=True to append " - "another tower.".format(name)) - - loss_list = [] - self._loss_dict[name] = loss_list - - loss_list.append(loss) - self.loss_colocation_ops[loss] = colocation_op - - def _get_use_count_map(self): - """Returns a dict mapping variables to their number of registrations.""" - return self._vars_to_uses - - def _add_uses(self, params, uses): - """Register additional uses by params in the graph. - - Args: - params: Variable or tuple of Variables. Parameters for a layer. - uses: int or float. Number of additional uses for these parameters. - """ - params = params if isinstance(params, (tuple, list)) else (params,) - for var in params: - self._vars_to_uses[var] += uses - - def check_registration(self, variables): - """Checks that all variable uses have been registered properly. - - Args: - variables: List of variables. - - Raises: - ValueError: If any registered variables are not included in the list. - ValueError: If any variable in the list is not registered. - ValueError: If any variable in the list is registered with the wrong - number of "uses" in the subgraph recorded (vs the number of times that - variable is actually used in the subgraph). - """ - # Note that overlapping parameters (i.e. those that share variables) will - # be caught by layer_collection.LayerParametersDict during registration. - - reg_use_map = self._get_use_count_map() - - error_messages = [] - - for var in variables: - total_uses = self.subgraph.variable_uses(var) - reg_uses = reg_use_map[var] - - if reg_uses == 0: - error_messages.append("Variable {} not registered.".format(var)) - elif (not math.isinf(reg_uses)) and reg_uses != total_uses: - error_messages.append( - "Variable {} registered with wrong number of uses ({} " - "registrations vs {} uses).".format(var, reg_uses, total_uses)) - - num_get_vars = len(reg_use_map) - - if num_get_vars > len(variables): - error_messages.append("{} registered variables were not included in list." - .format(num_get_vars - len(variables))) - - if error_messages: - error_messages = [ - "Found the following errors with variable registration:" - ] + error_messages - raise ValueError("\n\t".join(error_messages)) - - def get_blocks(self): - return self.fisher_blocks.values() - - def get_factors(self): - return self.fisher_factors.values() - - @property - def graph(self): - return self._graph - - @property - def subgraph(self): - return self._subgraph - - def define_linked_parameters(self, params, approximation=None): - """Identify a set of parameters that should be grouped together. - - During automatic graph scanning, any matches containing variables that have - been identified as part of a linked group will be filtered out unless - the match parameters are exactly equal to the ones specified in the linked - group. - - Args: - params: A variable, or a tuple or list of variables. The variables - to be linked. - approximation: Optional string specifying the type of approximation to use - for these variables. If unspecified, this layer collection's default - approximation for the layer type will be used. - - Raises: - ValueError: If the parameters were already registered in a layer or - identified as part of an incompatible group. - """ - params = frozenset(utils.ensure_sequence(params)) - - # Check if any of the variables in `params` is already in - # 'self.fisher_blocks.keys()`. - for registered_params, fisher_block in self.fisher_blocks.items(): - registered_params_set = set(utils.ensure_sequence(registered_params)) - for variable in params: - if (variable in registered_params_set and - params != registered_params_set): - raise ValueError( - "Can`t link parameters {}, variable {} was already registered in " - "group {} with layer {}".format(params, variable, - registered_params, fisher_block)) - - # Check if any of the variables in `params` is already in - # 'self.linked_parameters`. - for variable in params: - for other_linked_params in self.linked_parameters: - if variable in other_linked_params: - raise ValueError("Can`t link parameters {}, variable {} was already " - "linked in group {}.".format(params, variable, - other_linked_params)) - self._linked_parameters[params] = approximation - - def create_subgraph(self): - if not self.losses: - raise ValueError("Must have at least one registered loss.") - inputs_to_losses = nest.flatten(tuple(loss.inputs for loss in self.losses)) - self._subgraph = utils.SubGraph(inputs_to_losses) - - def eval_losses(self): - """Return evaluated losses (colocated with inputs to losses).""" - evals = [] - for loss in self.losses: - with ops.colocate_with(self.loss_colocation_ops[loss]): - evals.append(loss.evaluate()) - return evals - - def eval_losses_on_samples(self): - """Return losses evaluated on samples (colocated with inputs to losses).""" - evals = [] - for loss in self.losses: - with ops.colocate_with(self.loss_colocation_ops[loss]): - evals.append(loss.evaluate_on_sample()) - return evals - - def total_loss(self): - return math_ops.add_n(self.eval_losses()) - - def total_sampled_loss(self): - return math_ops.add_n(self.eval_losses_on_samples()) - - def _get_linked_approx(self, params): - """If params were linked, return their specified approximation.""" - params_set = frozenset(utils.ensure_sequence(params)) - if params_set in self.linked_parameters: - return self.linked_parameters[params_set] - else: - return None - - def _get_block_type(self, params, approx, default, approx_to_type): - if approx is None: - approx = self._get_linked_approx(params) - if approx is None: - approx = default - - if approx not in approx_to_type: - raise ValueError("Bad value {} for approx.".format(approx)) - - return approx_to_type[approx], approx - - def register_embedding(self, - params, - inputs, - outputs, - approx=None, - reuse=VARIABLE_SCOPE): - """Registers an embedding layer. - - Args: - params: Embedding matrix of shape [vocab_size, embedding_size]. - inputs: Tensor of shape [batch_size, input_size] and dtype int32. Indices - into embedding matrix. - outputs: Tensor of shape [batch_size, embedding_size]. Outputs - produced by layer. - approx: str or None. If not None must be "kron". The Fisher - approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds `inputs` and `outputs` as an - additional mini-batch/tower of data to use when estimating the Fisher - block for this layer (which must have already been registered). If - "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. - (Default: "VARIABLE_SCOPE") - - Raises: - ValueError: For improper value to `approx`. - KeyError: If reuse == True but no FisherBlock found for `params`. - ValueError: If reuse == True and FisherBlock found but of the wrong type. - """ - block_type, approx = self._get_block_type( - params, approx, self.default_embedding_approximation, - _EMBEDDING_APPROX_TO_BLOCK_TYPES) - - if isinstance(params, (tuple, list)): - raise ValueError("Bias not supported.") - vocab_size = int(params.shape[0]) - block = self.register_block( - params, block_type(self, vocab_size), reuse=reuse) - block.register_additional_tower(inputs, outputs) - - self._add_uses(params, 1) - - def register_fully_connected(self, - params, - inputs, - outputs, - approx=None, - reuse=VARIABLE_SCOPE): - """Registers a fully connected layer. - - Args: - params: Tensor or 2-tuple of Tensors corresponding to weight and bias of - this layer. Weight matrix should have shape [input_size, output_size]. - Bias should have shape [output_size]. - inputs: Tensor of shape [batch_size, input_size]. Inputs to layer. - outputs: Tensor of shape [batch_size, output_size]. Outputs - produced by layer. - approx: str or None. If not None must be one of "kron" or "diagonal". - The Fisher approximation to use. If None the default value is used. - (Default: None) - reuse: bool or str. If True, this adds `inputs` and `outputs` as an - additional mini-batch/tower of data to use when estimating the Fisher - block for this layer (which must have already been registered). If - "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. - (Default: "VARIABLE_SCOPE") - - Raises: - ValueError: For improper value to `approx`. - KeyError: If reuse == True but no FisherBlock found for `params`. - ValueError: If reuse == True and FisherBlock found but of the wrong type. - """ - - block_type, approx = self._get_block_type( - params, approx, self.default_fully_connected_approximation, - _FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES) - - has_bias = isinstance(params, (tuple, list)) - block = self.register_block(params, block_type(self, has_bias=has_bias), - reuse=reuse) - block.register_additional_tower(inputs, outputs) - - self._add_uses(params, 1) - - def register_conv2d(self, - params, - strides, - padding, - inputs, - outputs, - data_format=None, - dilations=None, - approx=None, - reuse=VARIABLE_SCOPE): - """Registers a call to tf.nn.conv2d(). - - Args: - params: Tensor or 2-tuple of Tensors corresponding to weight and bias of - this layer. Weight matrix should have shape [kernel_height, - kernel_width, in_channels, out_channels]. Bias should have shape - [out_channels]. - strides: List of 4 ints. Strides for convolution kernel. - padding: string. see tf.nn.conv2d for valid values. - inputs: Tensor of shape [batch_size, height, width, in_channels]. Inputs - to layer. - outputs: Tensor of shape [batch_size, height, width, out_channels]. - Output produced by layer. - data_format: str or None. Format of data. - dilations: List of 4 ints. Dilations along each dimension. - approx: str or None. If not None must be one of "kron" or "diagonal". - The Fisher approximation to use. If None the default value is used. - (Default: None) - reuse: bool or str. If True, this adds `inputs` and `outputs` as an - additional mini-batch/tower of data to use when estimating the Fisher - block for this layer (which must have already been registered). If - "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. - (Default: "VARIABLE_SCOPE") - - Raises: - ValueError: For improper value to `approx`. - KeyError: If reuse == True but no FisherBlock found for `params`. - ValueError: If reuse == True and FisherBlock found but of the wrong type. - """ - - block_type, approx = self._get_block_type( - params, approx, self.default_conv2d_approximation, - _CONV2D_APPROX_TO_BLOCK_TYPES) - - # It feels bad to pass in configuration that has to do with the internal - # implementation. And then we can`t use the same constructor for both - # anymore and are thus forced to use this ugly if-statement. - # TODO(b/74793309): Clean this up? - if approx == APPROX_KRONECKER_NAME: - block = self.register_block( - params, - block_type( - layer_collection=self, - params=params, - padding=padding, - strides=strides, - data_format=data_format, - dilation_rate=dilations, - extract_patches_fn="extract_image_patches"), - reuse=reuse) - elif approx == APPROX_DIAGONAL_NAME: - assert strides[0] == strides[-1] == 1 - block = self.register_block( - params, - block_type( - layer_collection=self, - params=params, - padding=padding, - strides=strides, - dilations=dilations, - data_format=data_format), - reuse=reuse) - else: - raise NotImplementedError(approx) - - block.register_additional_tower(inputs, outputs) - - self._add_uses(params, 1) - - def register_convolution(self, - params, - inputs, - outputs, - padding, - strides=None, - dilation_rate=None, - data_format=None, - approx=None, - reuse=VARIABLE_SCOPE): - """Register a call to tf.nn.convolution(). - - Args: - params: Tensor or 2-tuple of Tensors corresponding to weight and bias of - this layer. Weight matrix should have shape [..filter_spatial_size.., - in_channels, out_channels]. Bias should have shape [out_channels]. - inputs: Tensor of shape [batch_size, ..input_spatial_size.., in_channels]. - Inputs to layer. - outputs: Tensor of shape [batch_size, ..output_spatial_size.., - out_channels]. Output produced by layer. - padding: string. see tf.nn.conv2d for valid values. - strides: List of ints of length len(..input_spatial_size..). Strides for - convolution kernel in spatial dimensions. - dilation_rate: List of ints of length len(..input_spatial_size..). - Dilations along spatial dimension. - data_format: str or None. Format of data. - approx: str or None. If not None must be one of "kron" or "diagonal". - The Fisher approximation to use. If None the default value is used. - (Default: None) - reuse: bool or str. If True, this adds `inputs` and `outputs` as an - additional mini-batch/tower of data to use when estimating the Fisher - block for this layer (which must have already been registered). If - "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. - (Default: "VARIABLE_SCOPE") - - Raises: - ValueError: For improper value to `approx`. - KeyError: If reuse == True but no FisherBlock found for `params`. - ValueError: If reuse == True and FisherBlock found but of the wrong type. - """ - # TODO(b/74793309): Have this use _get_block_type like the other - # registration functions? - assert approx is None or approx == APPROX_KRONECKER_NAME - - block = self.register_block( - params, - fb.ConvKFCBasicFB( - layer_collection=self, - params=params, - padding=padding, - strides=strides, - dilation_rate=dilation_rate, - data_format=data_format), - reuse=reuse) - block.register_additional_tower(inputs, outputs) - - self._add_uses(params, 1) - - def register_depthwise_conv2d(self, - params, - inputs, - outputs, - strides, - padding, - rate=None, - data_format=None, - approx=None, - reuse=VARIABLE_SCOPE): - """Register a call to tf.nn.depthwise_conv2d(). - - Args: - params: 4-D Tensor of shape [filter_height, filter_width, - in_channels, channel_multiplier]. Convolutional filter. - inputs: Tensor of shape [batch_size, input_height, input_width, - in_channels]. Inputs to layer. - outputs: Tensor of shape [batch_size, output_height, output_width, - in_channels * channel_multiplier]. Output produced by depthwise conv2d. - strides: List of ints of length 4. Strides along all dimensions. - padding: string. see tf.nn.conv2d for valid values. - rate: None or List of ints of length 2. Dilation rates in spatial - dimensions. - data_format: str or None. Format of data. - approx: str or None. If not None must "diagonal". The Fisher - approximation to use. If None the default value is used. (Default: None) - reuse: bool or str. If True, this adds `inputs` and `outputs` as an - additional mini-batch/tower of data to use when estimating the Fisher - block for this layer (which must have already been registered). If - "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. - (Default: "VARIABLE_SCOPE") - - Raises: - ValueError: For improper value to `approx`. - KeyError: If reuse == True but no FisherBlock found for `params`. - ValueError: If reuse == True and FisherBlock found but of the wrong type. - """ - # TODO(b/74793309): Have this use _get_block_type like the other - # registration functions? - assert approx is None or approx == APPROX_DIAGONAL_NAME - assert data_format in [None, "NHWC"] - - block = self.register_block( - params, - fb.DepthwiseConvDiagonalFB( - layer_collection=self, - params=params, - strides=strides, - padding=padding, - rate=rate, - data_format=data_format), - reuse=reuse) - block.register_additional_tower(inputs, outputs) - - self._add_uses(params, 1) - - def register_separable_conv2d(self, - depthwise_params, - pointwise_params, - inputs, - depthwise_outputs, - pointwise_outputs, - strides, - padding, - rate=None, - data_format=None, - approx=None, - reuse=VARIABLE_SCOPE): - """Register a call to tf.nn.separable_conv2d(). - - Note: This requires access to intermediate outputs between depthwise and - pointwise convolutions. - - Args: - depthwise_params: 4-D Tensor of shape [filter_height, filter_width, - in_channels, channel_multiplier]. Filter for depthwise conv2d. - pointwise_params: 4-D Tensor of shape [1, 1, in_channels * - channel_multiplier, out_channels]. Filter for pointwise conv2d. - inputs: Tensor of shape [batch_size, input_height, input_width, - in_channels]. Inputs to layer. - depthwise_outputs: Tensor of shape [batch_size, output_height, - output_width, in_channels * channel_multiplier]. Output produced by - depthwise conv2d. - pointwise_outputs: Tensor of shape [batch_size, output_height, - output_width, out_channels]. Output produced by pointwise conv2d. - strides: List of ints of length 4. Strides for depthwise conv2d kernel in - all dimensions. - padding: string. see tf.nn.conv2d for valid values. - rate: None or List of ints of length 2. Dilation rate of depthwise conv2d - kernel in spatial dimensions. - data_format: str or None. Format of data. - approx: str or None. If not None must be one of "kron" or "diagonal". - The Fisher approximation to use. If None the default value is used. - (Default: None) - reuse: bool or str. If True, this adds `inputs` and `outputs` as an - additional mini-batch/tower of data to use when estimating the Fisher - block for this layer (which must have already been registered). If - "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. - (Default: "VARIABLE_SCOPE") - - Raises: - ValueError: For improper value to `approx`. - KeyError: If reuse == True but no FisherBlock found for `params`. - ValueError: If reuse == True and FisherBlock found but of the wrong type. - """ - self.register_depthwise_conv2d( - params=depthwise_params, - inputs=inputs, - outputs=depthwise_outputs, - strides=strides, - padding=padding, - rate=rate, - data_format=data_format, - approx=APPROX_DIAGONAL_NAME, - reuse=reuse) - - self.register_conv2d( - params=pointwise_params, - inputs=depthwise_outputs, - outputs=pointwise_outputs, - strides=[1, 1, 1, 1], - padding="VALID", - data_format=data_format, - approx=approx, - reuse=reuse) - - def register_generic(self, - params, - batch_size, - approx=None, - reuse=VARIABLE_SCOPE): - """Registers a generic layer. - - Args: - params: Tensor or tuple of Tensors corresponding to the parameters. - batch_size: 0-D Tensor. Size of the minibatch (for this tower). - approx: str or None. It not None, must be one of "full" or "diagonal". - The Fisher approximation to use. If None the default value is used. - (Default: None) - reuse: bool or str. If True, this adds `batch_size` to the total - mini-batch size use when estimating the Fisher block for this layer - (which must have already been registered). If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") - - Raises: - ValueError: For improper value to `approx`. - KeyError: If reuse == True but no FisherBlock found for `params`. - ValueError: If reuse == True and FisherBlock found but of the wrong type. - """ - block_type, approx = self._get_block_type( - params, approx, self.default_generic_approximation, - _GENERIC_APPROX_TO_BLOCK_TYPES) - - block = self.register_block(params, block_type(self, params), reuse=reuse) - block.register_additional_tower(batch_size) - - self._add_uses(params, float("inf")) - - def register_fully_connected_multi(self, params, inputs, outputs, - num_uses=None, approx=None, - reuse=VARIABLE_SCOPE): - """Register fully connected layers with shared parameters. - - This can handle general fully-connected layers with shared parameters, but - has specialized approximations to deal with the case where there is a - meaningful linear order to the share instances (such as in an RNN). - - Args: - params: Tensor or 2-tuple of Tensors corresponding to weight and bias of - this layer. Weight matrix should have shape [input_size, output_size]. - Bias should have shape [output_size]. - inputs: A list of Tensors, each of shape [batch_size, input_size]. Inputs - to layer. The list indexes each use in the graph (which might - correspond to a "time-step" in an RNN). OR, can be single Tensor, of - shape [num_uses * batch_size , input_size], which is a reshaped version - of a Tensor of shape [num_uses, batch_size, input_size]. - outputs: A list of Tensors, the same length as `inputs`, each of shape - [batch_size, output_size]. Outputs produced by layer. The list indexes - each use in the graph (which might correspond to a "time-step" in an - RNN). Needs to correspond with the order used in `inputs`. OR, can be - a single Tensor of shape [num_uses * batch_size, output_size], which is - a reshaped version of a Tensor of shape [num_uses, batch_size, - output_size]. - num_uses: int or None. The number uses/time-steps in the graph where the - layer appears. Only needed if both inputs and outputs are given in the - single Tensor format. (Default: None) - approx: str or None. If not None, must be of "kron_indep", "kron_series_1" - or "kron_series_2". The Fisher approximation to use. If None the default - value is used. (Default: None) - reuse: bool or str. If True, this adds `inputs` and `outputs` as an - additional mini-batch/tower of data to use when estimating the Fisher - block for this layer (which must have already been registered). If - "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Note that the - word `use` here has a completely different meaning to "use in the graph" - as it pertains to the `inputs`, `outputs`, and `num_uses` arguments.) - (Default: "VARIABLE_SCOPE") - - Raises: - ValueError: For improper value to `approx`. - """ - block_type, approx = self._get_block_type( - params, approx, self.default_fully_connected_multi_approximation, - _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES) - - # TODO(b/70283649): something along the lines of find_canonical_output - # should be added back in here (and for the other block types, arguably). - - has_bias = isinstance(params, (tuple, list)) - block = self.register_block(params, block_type(self, has_bias=has_bias, - num_uses=num_uses), - reuse=reuse) - block.register_additional_tower(inputs, outputs) - if isinstance(inputs, (tuple, list)): - assert len(inputs) == len(outputs) - self._add_uses(params, len(inputs)) - else: - self._add_uses(params, 1) - - def register_conv2d_multi(self, - params, - strides, - padding, - inputs, - outputs, - num_uses=None, - data_format=None, - dilations=None, - approx=None, - reuse=VARIABLE_SCOPE): - """Registers convolutional layers with shared parameters. - - Args: - params: Tensor or 2-tuple of Tensors corresponding to weight and bias of - this layer. Weight matrix should have shape [kernel_height, - kernel_width, in_channels, out_channels]. Bias should have shape - [out_channels]. - strides: 1-D Tensor of length 4. Strides for convolution kernel. - padding: string. see tf.nn.conv2d for valid values. - inputs: A list of Tensors, each of shape [batch_size, height, width, - in_channels]. Inputs to layer. The list indexes each use in the graph - (which might correspond to a "time-step" in an RNN). OR, can be single - Tensor, of shape [num_uses * batch_size, height, width, in_channels], - which is a reshaped version of a Tensor of shape [num_uses, batch_size, - height, width, in_channels]. - outputs: A list of Tensors, each of shape [batch_size, height, width, - out_channels]. Output produced by layer. The list indexes each use - in the graph (which might correspond to a "time-step" in an RNN). - Needs to correspond with the order used in `inputs`. OR, can be a - single Tensor, of shape [num_uses * batch_size, height, width, - out_channels], which is a reshaped version of a Tensor of shape - [num_uses, batch_size, height, width, out_channels]. - num_uses: int or None. The number uses/time-steps in the graph where the - layer appears. Only needed if both inputs and outputs are given in the - single Tensor format. (Default: None) - data_format: str or None. Format of data. - dilations: List of 4 ints. Dilations along each dimension. - approx: str or None. If not None must by "kron_indep". The Fisher - approximation to use. If None the default value is used. - (Default: None) - reuse: bool or str. If True, this adds `inputs` and `outputs` as an - additional mini-batch/tower of data to use when estimating the Fisher - block for this layer (which must have already been registered). If - "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Note that the - word `use` here has a completely different meaning to "use in the graph" - as it pertains to the `inputs`, `outputs`, and `num_uses` arguments.) - (Default: "VARIABLE_SCOPE") - - Raises: - ValueError: For improper value to `approx`. - KeyError: If reuse == True but no FisherBlock found for `params`. - ValueError: If reuse == True and FisherBlock found but of the wrong type. - """ - block_type, approx = self._get_block_type( - params, approx, self.default_conv2d_multi_approximation, - _CONV2D_MULTI_APPROX_TO_BLOCK_TYPES) - - block = self.register_block( - params, - block_type( - layer_collection=self, - params=params, - padding=padding, - strides=strides, - data_format=data_format, - dilation_rate=dilations, - extract_patches_fn="extract_image_patches", - num_uses=num_uses), - reuse=reuse) - - block.register_additional_tower(inputs, outputs) - if isinstance(inputs, (tuple, list)): - assert len(inputs) == len(outputs) - self._add_uses(params, len(inputs)) - else: - self._add_uses(params, 1) - - # TODO(b/74108452): change the loss registration functions names to refer - # to "loss functions" instead of distributions. Following naming convention - # of the loss function classes themselves. - - def register_embedding_multi(self, - params, - inputs, - outputs, - num_uses=None, - approx=None, - reuse=VARIABLE_SCOPE): - """Registers embedding layers with shared parameters. - - Args: - params: Embedding matrix of shape [vocab_size, embedding_size]. - inputs: A list of Tensors, each of shape [batch_size, input_size] and - dtype int32. Indices into embedding matrix. The list indexes each use - in the graph (which might correspond to a "time-step" in an RNN). - OR, can be single Tensor, of shape [num_uses*batch_size, input_size], - which is a reshaped version of a Tensor of shape [num_uses, batch_size, - input_size]. - outputs: A list of Tensors, each of shape [batch_size, embedding_size]. - Outputs produced by layer. The list indexes each use in the graph - (which might correspond to a "time-step" in an RNN). Needs to - correspond with the order used in `inputs`. OR, can be a - single Tensor, of shape [num_uses * batch_size, embedding_size], which - is a reshaped version of a Tensor of shape [num_uses, batch_size, - embedding_size]. - num_uses: int or None. The number uses/time-steps in the graph where the - layer appears. Only needed if both inputs and outputs are given in the - single Tensor format. (Default: None) - approx: str or None. If not None must by "kron_indep". The Fisher - approximation to use. If None the default value is used. - (Default: None) - reuse: bool or str. If True, this adds `inputs` and `outputs` as an - additional mini-batch/tower of data to use when estimating the Fisher - block for this layer (which must have already been registered). If - "VARIABLE_SCOPE", use tf.get_variable_scope().reuse. (Note that the - word `use` here has a completely different meaning to "use in the graph" - as it pertains to the `inputs`, `outputs`, and `num_uses` arguments.) - (Default: "VARIABLE_SCOPE") - - Raises: - ValueError: For improper value to `approx`. - KeyError: If reuse == True but no FisherBlock found for `params`. - ValueError: If reuse == True and FisherBlock found but of the wrong type. - """ - block_type, approx = self._get_block_type( - params, approx, self.default_embedding_multi_approximation, - _EMBEDDING_MULTI_APPROX_TO_BLOCK_TYPES) - - if isinstance(params, (tuple, list)): - raise ValueError("Bias not supported.") - vocab_size = int(params.shape[0]) - - block = self.register_block( - params, block_type(self, vocab_size, num_uses=num_uses), reuse=reuse) - block.register_additional_tower(inputs, outputs) - - if isinstance(inputs, (tuple, list)): - self._add_uses(params, len(inputs)) - else: - self._add_uses(params, 1) - - def register_categorical_predictive_distribution(self, - logits, - seed=None, - targets=None, - name=None, - reuse=VARIABLE_SCOPE): - """Registers a categorical predictive distribution. - - Args: - logits: The logits of the distribution (i.e. its parameters). - seed: The seed for the RNG (for debugging) (Default: None) - targets: (OPTIONAL) The targets for the loss function. Only required if - one wants to call total_loss() instead of total_sampled_loss(). - total_loss() is required, for example, to estimate the - "empirical Fisher" (instead of the true Fisher). - (Default: None) - name: (OPTIONAL) str or None. Unique name for this loss function. If None, - a new name is generated. (Default: None) - reuse: bool or str. If True, this adds `logits` as an additional - mini-batch/tower of inputs to the loss-function/predictive distribution - (which must have already been registered). If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") - """ - loss = lf.CategoricalLogitsNegativeLogProbLoss(logits, targets=targets, - seed=seed) - self.register_loss_function(loss, logits, - "categorical_predictive_distribution", - name=name, reuse=reuse) - - def register_normal_predictive_distribution(self, - mean, - var=0.5, - seed=None, - targets=None, - name=None, - reuse=VARIABLE_SCOPE): - """Registers a normal predictive distribution. - - Args: - mean: The mean vector defining the distribution. - var: The variance (must be a scalar). Note that the default value of - 0.5 corresponds to a standard squared error loss (target - - prediction)**2. If your squared error loss is of the form - 0.5*(target - prediction)**2 you should use var=1.0. (Default: 0.5) - seed: The seed for the RNG (for debugging) (Default: None) - targets: (OPTIONAL) The targets for the loss function. Only required if - one wants to call total_loss() instead of total_sampled_loss(). - total_loss() is required, for example, to estimate the - "empirical Fisher" (instead of the true Fisher). - (Default: None) - name: (OPTIONAL) str or None. Unique name for this loss function. If None, - a new name is generated. (Default: None) - reuse: bool or str. If True, this adds `mean` and `var` as an additional - mini-batch/tower of inputs to the loss-function/predictive distribution - (which must have already been registered). If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") - """ - loss = lf.NormalMeanNegativeLogProbLoss(mean, var, targets=targets, - seed=seed) - self.register_loss_function(loss, mean, - "normal_predictive_distribution", - name=name, reuse=reuse) - - def register_multi_bernoulli_predictive_distribution(self, - logits, - seed=None, - targets=None, - name=None, - reuse=VARIABLE_SCOPE): - """Registers a multi-Bernoulli predictive distribution. - - Args: - logits: The logits of the distribution (i.e. its parameters). - seed: The seed for the RNG (for debugging) (Default: None) - targets: (OPTIONAL) The targets for the loss function. Only required if - one wants to call total_loss() instead of total_sampled_loss(). - total_loss() is required, for example, to estimate the - "empirical Fisher" (instead of the true Fisher). - (Default: None) - name: (OPTIONAL) str or None. Unique name for this loss function. If None, - a new name is generated. (Default: None) - reuse: bool or str. If True, this adds `logits` as an additional - mini-batch/tower of inputs to the loss-function/predictive distribution - (which must have already been registered). If "VARIABLE_SCOPE", use - tf.get_variable_scope().reuse. (Default: "VARIABLE_SCOPE") - """ - loss = lf.MultiBernoulliNegativeLogProbLoss(logits, targets=targets, - seed=seed) - self.register_loss_function(loss, logits, - "multi_bernoulli_predictive_distribution", - name=name, reuse=reuse) - - def make_or_get_factor(self, cls, args): - """Insert `cls(args)` into 'self.fisher_factors` if not already present. - - Wraps constructor in `tf.variable_scope()` to ensure variables constructed - in `cls.__init__` are placed under this LayerCollection's scope. - - Args: - cls: Class that implements FisherFactor. - args: Tuple of arguments to pass into `cls's constructor. Must be - hashable. - - Returns: - Instance of `cls` found in self.fisher_factors. - """ - try: - hash(args) - except TypeError: - raise TypeError( - ("Unable to use (cls, args) = ({}, {}) as a key in " - "LayerCollection.fisher_factors. The pair cannot be hashed.").format( - cls, args)) - - key = cls, args - if key not in self.fisher_factors: - with variable_scope.variable_scope(self._var_scope): - self.fisher_factors[key] = cls(*args) - return self.fisher_factors[key] - - @contextmanager - def as_default(self): - """Sets this LayerCollection as the default.""" - set_default_layer_collection(self) - yield - set_default_layer_collection(None) diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection_lib.py b/tensorflow/contrib/kfac/python/ops/layer_collection_lib.py deleted file mode 100644 index 9f46853807..0000000000 --- a/tensorflow/contrib/kfac/python/ops/layer_collection_lib.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Registry for layers and their parameters/variables. - -This represents the collection of all layers in the approximate Fisher -information matrix to which a particular FisherBlock may belong. That is, we -might have several layer collections for one TF graph (if we have multiple K-FAC -optimizers being used, for example.) -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.kfac.python.ops.layer_collection import * -from tensorflow.python.util.all_util import remove_undocumented -# pylint: enable=unused-import,line-too-long,wildcard-import - -_allowed_symbols = [ - "get_default_layer_collection", - "set_default_layer_collection", - "LayerParametersDict", - "LayerCollection", - "APPROX_KRONECKER_NAME", - "APPROX_DIAGONAL_NAME", - "APPROX_FULL_NAME", - "VARIABLE_SCOPE", - "APPROX_KRONECKER_INDEP_NAME", - "APPROX_KRONECKER_SERIES_1_NAME", - "APPROX_KRONECKER_SERIES_2_NAME" -] - -remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/linear_operator.py b/tensorflow/contrib/kfac/python/ops/linear_operator.py deleted file mode 100644 index 61cb955ae8..0000000000 --- a/tensorflow/contrib/kfac/python/ops/linear_operator.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""SmartMatrices definitions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.kfac.python.ops import utils -from tensorflow.python.framework import ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.linalg import linalg -from tensorflow.python.ops.linalg import linalg_impl -from tensorflow.python.ops.linalg import linear_operator_util as lou - - -class LinearOperatorExtras(object): # pylint: disable=missing-docstring - - def matmul(self, x, adjoint=False, adjoint_arg=False, name="matmul"): - - with self._name_scope(name, values=[x]): - if isinstance(x, ops.IndexedSlices): - return self._matmul_sparse(x, adjoint=adjoint, adjoint_arg=adjoint_arg) - - x = ops.convert_to_tensor(x, name="x") - self._check_input_dtype(x) - - self_dim = -2 if adjoint else -1 - arg_dim = -1 if adjoint_arg else -2 - self.shape[self_dim].assert_is_compatible_with(x.get_shape()[arg_dim]) - - return self._matmul(x, adjoint=adjoint, adjoint_arg=adjoint_arg) - - def matmul_right(self, x, adjoint=False, adjoint_arg=False, name="matmul"): - - with self._name_scope(name, values=[x]): - - if isinstance(x, ops.IndexedSlices): - return self._matmul_right_sparse( - x, adjoint=adjoint, adjoint_arg=adjoint_arg) - - x = ops.convert_to_tensor(x, name="x") - self._check_input_dtype(x) - - self_dim = -1 if adjoint else -2 - arg_dim = -2 if adjoint_arg else -1 - self.shape[self_dim].assert_is_compatible_with(x.get_shape()[arg_dim]) - - return self._matmul_right(x, adjoint=adjoint, adjoint_arg=adjoint_arg) - - -class LinearOperatorFullMatrix(LinearOperatorExtras, - linalg.LinearOperatorFullMatrix): - - # TODO(b/78117889) Remove this definition once core LinearOperator - # has _matmul_right. - def _matmul_right(self, x, adjoint=False, adjoint_arg=False): - return lou.matmul_with_broadcast( - x, self._matrix, adjoint_a=adjoint_arg, adjoint_b=adjoint) - - def _matmul_sparse(self, x, adjoint=False, adjoint_arg=False): - raise NotImplementedError - - def _matmul_right_sparse(self, x, adjoint=False, adjoint_arg=False): - assert not adjoint and not adjoint_arg - return utils.matmul_sparse_dense(x, self._matrix) - - -class LinearOperatorDiag(LinearOperatorExtras, # pylint: disable=missing-docstring - linalg.LinearOperatorDiag): - - def _matmul_right(self, x, adjoint=False, adjoint_arg=False): - diag_mat = math_ops.conj(self._diag) if adjoint else self._diag - x = linalg_impl.adjoint(x) if adjoint_arg else x - return diag_mat * x - - def _matmul_sparse(self, x, adjoint=False, adjoint_arg=False): - diag_mat = math_ops.conj(self._diag) if adjoint else self._diag - assert not adjoint_arg - return utils.matmul_diag_sparse(diag_mat, x) - - def _matmul_right_sparse(self, x, adjoint=False, adjoint_arg=False): - raise NotImplementedError diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions.py b/tensorflow/contrib/kfac/python/ops/loss_functions.py deleted file mode 100644 index c8cebc42cb..0000000000 --- a/tensorflow/contrib/kfac/python/ops/loss_functions.py +++ /dev/null @@ -1,754 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Loss functions to be used by LayerCollection.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc - -import six - -from tensorflow.contrib.distributions.python.ops import onehot_categorical -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.distributions import bernoulli -from tensorflow.python.ops.distributions import categorical -from tensorflow.python.ops.distributions import normal - - -@six.add_metaclass(abc.ABCMeta) -class LossFunction(object): - """Abstract base class for loss functions. - - Note that unlike typical loss functions used in neural networks these are - summed and not averaged across cases in the batch, since this is what the - users of this class (FisherEstimator and MatrixVectorProductComputer) will - be expecting. The implication of this is that you will may want to - normalize things like Fisher-vector products by the batch size when you - use this class. It depends on the use case. - """ - - @abc.abstractproperty - def targets(self): - """The targets being predicted by the model. - - Returns: - None or Tensor of appropriate shape for calling self._evaluate() on. - """ - pass - - @abc.abstractproperty - def inputs(self): - """The inputs to the loss function (excluding the targets).""" - pass - - def evaluate(self): - """Evaluate the loss function on the targets.""" - if self.targets is not None: - # We treat the targets as "constant". It's only the inputs that get - # "back-propped" through. - return self._evaluate(array_ops.stop_gradient(self.targets)) - else: - raise Exception("Cannot evaluate losses with unspecified targets.") - - @abc.abstractmethod - def _evaluate(self, targets): - """Evaluates the negative log probability of the targets. - - Args: - targets: Tensor that distribution can calculate log_prob() of. - - Returns: - negative log probability of each target, summed across all targets. - """ - pass - - @abc.abstractmethod - def multiply_hessian(self, vector): - """Right-multiply a vector by the Hessian. - - Here the 'Hessian' is the Hessian matrix (i.e. matrix of 2nd-derivatives) - of the loss function with respect to its inputs. - - Args: - vector: The vector to multiply. Must be the same shape(s) as the - 'inputs' property. - - Returns: - The vector right-multiplied by the Hessian. Will be of the same shape(s) - as the 'inputs' property. - """ - pass - - @abc.abstractmethod - def multiply_hessian_factor(self, vector): - """Right-multiply a vector by a factor B of the Hessian. - - Here the 'Hessian' is the Hessian matrix (i.e. matrix of 2nd-derivatives) - of the loss function with respect to its inputs. Typically this will be - block-diagonal across different cases in the batch, since the loss function - is typically summed across cases. - - Note that B can be any matrix satisfying B * B^T = H where H is the Hessian, - but will agree with the one used in the other methods of this class. - - Args: - vector: The vector to multiply. Must be of the shape given by the - 'hessian_factor_inner_shape' property. - - Returns: - The vector right-multiplied by B. Will be of the same shape(s) as the - 'inputs' property. - """ - pass - - @abc.abstractmethod - def multiply_hessian_factor_transpose(self, vector): - """Right-multiply a vector by the transpose of a factor B of the Hessian. - - Here the 'Hessian' is the Hessian matrix (i.e. matrix of 2nd-derivatives) - of the loss function with respect to its inputs. Typically this will be - block-diagonal across different cases in the batch, since the loss function - is typically summed across cases. - - Note that B can be any matrix satisfying B * B^T = H where H is the Hessian, - but will agree with the one used in the other methods of this class. - - Args: - vector: The vector to multiply. Must be the same shape(s) as the - 'inputs' property. - - Returns: - The vector right-multiplied by B^T. Will be of the shape given by the - 'hessian_factor_inner_shape' property. - """ - pass - - @abc.abstractmethod - def multiply_hessian_factor_replicated_one_hot(self, index): - """Right-multiply a replicated-one-hot vector by a factor B of the Hessian. - - Here the 'Hessian' is the Hessian matrix (i.e. matrix of 2nd-derivatives) - of the loss function with respect to its inputs. Typically this will be - block-diagonal across different cases in the batch, since the loss function - is typically summed across cases. - - A 'replicated-one-hot' vector means a tensor which, for each slice along the - batch dimension (assumed to be dimension 0), is 1.0 in the entry - corresponding to the given index and 0 elsewhere. - - Note that B can be any matrix satisfying B * B^T = H where H is the Hessian, - but will agree with the one used in the other methods of this class. - - Args: - index: A tuple representing in the index of the entry in each slice that - is 1.0. Note that len(index) must be equal to the number of elements - of the 'hessian_factor_inner_shape' tensor minus one. - - Returns: - The vector right-multiplied by B^T. Will be of the same shape(s) as the - 'inputs' property. - """ - pass - - @abc.abstractproperty - def hessian_factor_inner_shape(self): - """The shape of the tensor returned by multiply_hessian_factor.""" - pass - - @abc.abstractproperty - def hessian_factor_inner_static_shape(self): - """Static version of hessian_factor_inner_shape.""" - pass - - -@six.add_metaclass(abc.ABCMeta) -class NegativeLogProbLoss(LossFunction): - """Abstract base class for loss functions that are negative log probs.""" - - def __init__(self, seed=None): - self._default_seed = seed - super(NegativeLogProbLoss, self).__init__() - - @property - def inputs(self): - return self.params - - @abc.abstractproperty - def params(self): - """Parameters to the underlying distribution.""" - pass - - @abc.abstractmethod - def multiply_fisher(self, vector): - """Right-multiply a vector by the Fisher. - - Args: - vector: The vector to multiply. Must be the same shape(s) as the - 'inputs' property. - - Returns: - The vector right-multiplied by the Fisher. Will be of the same shape(s) - as the 'inputs' property. - """ - pass - - @abc.abstractmethod - def multiply_fisher_factor(self, vector): - """Right-multiply a vector by a factor B of the Fisher. - - Here the 'Fisher' is the Fisher information matrix (i.e. expected outer- - product of gradients) with respect to the parameters of the underlying - probability distribution (whose log-prob defines the loss). Typically this - will be block-diagonal across different cases in the batch, since the - distribution is usually (but not always) conditionally iid across different - cases. - - Note that B can be any matrix satisfying B * B^T = F where F is the Fisher, - but will agree with the one used in the other methods of this class. - - Args: - vector: The vector to multiply. Must be of the shape given by the - 'fisher_factor_inner_shape' property. - - Returns: - The vector right-multiplied by B. Will be of the same shape(s) as the - 'inputs' property. - """ - pass - - @abc.abstractmethod - def multiply_fisher_factor_transpose(self, vector): - """Right-multiply a vector by the transpose of a factor B of the Fisher. - - Here the 'Fisher' is the Fisher information matrix (i.e. expected outer- - product of gradients) with respect to the parameters of the underlying - probability distribution (whose log-prob defines the loss). Typically this - will be block-diagonal across different cases in the batch, since the - distribution is usually (but not always) conditionally iid across different - cases. - - Note that B can be any matrix satisfying B * B^T = F where F is the Fisher, - but will agree with the one used in the other methods of this class. - - Args: - vector: The vector to multiply. Must be the same shape(s) as the - 'inputs' property. - - Returns: - The vector right-multiplied by B^T. Will be of the shape given by the - 'fisher_factor_inner_shape' property. - """ - pass - - @abc.abstractmethod - def multiply_fisher_factor_replicated_one_hot(self, index): - """Right-multiply a replicated-one-hot vector by a factor B of the Fisher. - - Here the 'Fisher' is the Fisher information matrix (i.e. expected outer- - product of gradients) with respect to the parameters of the underlying - probability distribution (whose log-prob defines the loss). Typically this - will be block-diagonal across different cases in the batch, since the - distribution is usually (but not always) conditionally iid across different - cases. - - A 'replicated-one-hot' vector means a tensor which, for each slice along the - batch dimension (assumed to be dimension 0), is 1.0 in the entry - corresponding to the given index and 0 elsewhere. - - Note that B can be any matrix satisfying B * B^T = H where H is the Fisher, - but will agree with the one used in the other methods of this class. - - Args: - index: A tuple representing in the index of the entry in each slice that - is 1.0. Note that len(index) must be equal to the number of elements - of the 'fisher_factor_inner_shape' tensor minus one. - - Returns: - The vector right-multiplied by B. Will be of the same shape(s) as the - 'inputs' property. - """ - pass - - @abc.abstractproperty - def fisher_factor_inner_shape(self): - """The shape of the tensor returned by multiply_fisher_factor.""" - pass - - @abc.abstractproperty - def fisher_factor_inner_static_shape(self): - """Static version of fisher_factor_inner_shape.""" - pass - - @abc.abstractmethod - def sample(self, seed): - """Sample 'targets' from the underlying distribution.""" - pass - - def evaluate_on_sample(self, seed=None): - """Evaluates the log probability on a random sample. - - Args: - seed: int or None. Random seed for this draw from the distribution. - - Returns: - Log probability of sampled targets, summed across examples. - """ - if seed is None: - seed = self._default_seed - # We treat the targets as "constant". It's only the inputs that get - # "back-propped" through. - return self._evaluate(array_ops.stop_gradient(self.sample(seed))) - - -# TODO(jamesmartens): should this just inherit from object to avoid "diamond" -# inheritance, or is there a better way? -class NaturalParamsNegativeLogProbLoss(NegativeLogProbLoss): - """Base class for neg log prob losses whose inputs are 'natural' parameters. - - Note that the Hessian and Fisher for natural parameters of exponential- - family models are the same, hence the purpose of this class. - See here: https://arxiv.org/abs/1412.1193 - - 'Natural parameters' are defined for exponential-family models. See for - example: https://en.wikipedia.org/wiki/Exponential_family - """ - - def multiply_hessian(self, vector): - return self.multiply_fisher(vector) - - def multiply_hessian_factor(self, vector): - return self.multiply_fisher_factor(vector) - - def multiply_hessian_factor_transpose(self, vector): - return self.multiply_fisher_factor_transpose(vector) - - def multiply_hessian_factor_replicated_one_hot(self, index): - return self.multiply_fisher_factor_replicated_one_hot(index) - - @property - def hessian_factor_inner_shape(self): - return self.fisher_factor_inner_shape - - @property - def hessian_factor_inner_static_shape(self): - return self.fisher_factor_inner_shape - - -class DistributionNegativeLogProbLoss(NegativeLogProbLoss): - """Base class for neg log prob losses that use the TF Distribution classes.""" - - def __init__(self, seed=None): - super(DistributionNegativeLogProbLoss, self).__init__(seed=seed) - - @abc.abstractproperty - def dist(self): - """The underlying tf.distributions.Distribution.""" - pass - - def _evaluate(self, targets): - return -math_ops.reduce_sum(self.dist.log_prob(targets)) - - def sample(self, seed): - return self.dist.sample(seed=seed) - - -class NormalMeanNegativeLogProbLoss(DistributionNegativeLogProbLoss, - NaturalParamsNegativeLogProbLoss): - """Neg log prob loss for a normal distribution parameterized by a mean vector. - - - Note that the covariance is treated as a constant 'var' times the identity. - Also note that the Fisher for such a normal distribution with respect the mean - parameter is given by: - - F = (1/var) * I - - See for example https://www.ii.pwr.edu.pl/~tomczak/PDF/[JMT]Fisher_inf.pdf. - """ - - def __init__(self, mean, var=0.5, targets=None, seed=None): - self._mean = mean - self._var = var - self._targets = targets - super(NormalMeanNegativeLogProbLoss, self).__init__(seed=seed) - - @property - def targets(self): - return self._targets - - @property - def dist(self): - return normal.Normal(loc=self._mean, scale=math_ops.sqrt(self._var)) - - @property - def params(self): - return self._mean - - def multiply_fisher(self, vector): - return (1. / self._var) * vector - - def multiply_fisher_factor(self, vector): - return self._var**-0.5 * vector - - def multiply_fisher_factor_transpose(self, vector): - return self.multiply_fisher_factor(vector) # it's symmetric in this case - - def multiply_fisher_factor_replicated_one_hot(self, index): - assert len(index) == 1, "Length of index was {}".format(len(index)) - ones_slice = array_ops.expand_dims( - array_ops.ones(array_ops.shape(self._mean)[:1], dtype=self._mean.dtype), - axis=-1) - output_slice = self._var**-0.5 * ones_slice - return insert_slice_in_zeros(output_slice, 1, int(self._mean.shape[1]), - index[0]) - - @property - def fisher_factor_inner_shape(self): - return array_ops.shape(self._mean) - - @property - def fisher_factor_inner_static_shape(self): - return self._mean.shape - - -class NormalMeanVarianceNegativeLogProbLoss(DistributionNegativeLogProbLoss): - """Negative log prob loss for a normal distribution with mean and variance. - - This class parameterizes a multivariate normal distribution with n independent - dimensions. Unlike `NormalMeanNegativeLogProbLoss`, this class does not - assume the variance is held constant. The Fisher Information for n = 1 - is given by, - - F = [[1 / variance, 0], - [ 0, 0.5 / variance^2]] - - where the parameters of the distribution are concatenated into a single - vector as [mean, variance]. For n > 1, the mean parameter vector is - concatenated with the variance parameter vector. - - See https://www.ii.pwr.edu.pl/~tomczak/PDF/[JMT]Fisher_inf.pdf for derivation. - """ - - def __init__(self, mean, variance, targets=None, seed=None): - assert len(mean.shape) == 2, "Expect 2D mean tensor." - assert len(variance.shape) == 2, "Expect 2D variance tensor." - self._mean = mean - self._variance = variance - self._targets = targets - super(NormalMeanVarianceNegativeLogProbLoss, self).__init__(seed=seed) - - @property - def targets(self): - return self._targets - - @property - def dist(self): - return normal.Normal(loc=self._mean, scale=math_ops.sqrt(self._variance)) - - @property - def params(self): - return self._mean, self._variance - - def _concat(self, mean, variance): - return array_ops.concat([mean, variance], axis=-1) - - def _split(self, params): - return array_ops.split(params, 2, axis=-1) - - @property - def _fisher_mean(self): - return 1. / self._variance - - @property - def _fisher_mean_factor(self): - return 1. / math_ops.sqrt(self._variance) - - @property - def _fisher_var(self): - return 1. / (2 * math_ops.square(self._variance)) - - @property - def _fisher_var_factor(self): - return 1. / (math_ops.sqrt(2.) * self._variance) - - def multiply_fisher(self, vecs): - mean_vec, var_vec = vecs - return (self._fisher_mean * mean_vec, self._fisher_var * var_vec) - - def multiply_fisher_factor(self, vecs): - mean_vec, var_vec = self._split(vecs) - return (self._fisher_mean_factor * mean_vec, - self._fisher_var_factor * var_vec) - - def multiply_fisher_factor_transpose(self, vecs): - mean_vec, var_vec = vecs - return self._concat(self._fisher_mean_factor * mean_vec, - self._fisher_var_factor * var_vec) - - def multiply_fisher_factor_replicated_one_hot(self, index): - assert len(index) == 1, "Length of index was {}".format(len(index)) - index = index[0] - - if index < int(self._mean.shape[-1]): - # Index corresponds to mean parameter. - mean_slice = self._fisher_mean_factor[:, index] - mean_slice = array_ops.expand_dims(mean_slice, axis=-1) - mean_output = insert_slice_in_zeros(mean_slice, 1, int( - self._mean.shape[1]), index) - var_output = array_ops.zeros_like(mean_output) - else: - index -= int(self._mean.shape[-1]) - # Index corresponds to variance parameter. - var_slice = self._fisher_var_factor[:, index] - var_slice = array_ops.expand_dims(var_slice, axis=-1) - var_output = insert_slice_in_zeros(var_slice, 1, - int(self._variance.shape[1]), index) - mean_output = array_ops.zeros_like(var_output) - - return mean_output, var_output - - @property - def fisher_factor_inner_shape(self): - return array_ops.concat( - [ - array_ops.shape(self._mean)[:-1], - 2 * array_ops.shape(self._mean)[-1:] - ], - axis=0) - - @property - def fisher_factor_inner_static_shape(self): - shape = self._mean.shape.as_list() - return tensor_shape.TensorShape(shape[-1:] + [2 * shape[-1]]) - - def multiply_hessian(self, vector): - raise NotImplementedError() - - def multiply_hessian_factor(self, vector): - raise NotImplementedError() - - def multiply_hessian_factor_transpose(self, vector): - raise NotImplementedError() - - def multiply_hessian_factor_replicated_one_hot(self, index): - raise NotImplementedError() - - @property - def hessian_factor_inner_shape(self): - raise NotImplementedError() - - @property - def hessian_factor_inner_static_shape(self): - raise NotImplementedError() - - -class CategoricalLogitsNegativeLogProbLoss(DistributionNegativeLogProbLoss, - NaturalParamsNegativeLogProbLoss): - """Neg log prob loss for a categorical distribution parameterized by logits. - - - Note that the Fisher (for a single case) of a categorical distribution, with - respect to the natural parameters (i.e. the logits), is given by: - - F = diag(p) - p*p^T - - where p = softmax(logits). F can be factorized as F = B * B^T where - - B = diag(q) - p*q^T - - where q is the entry-wise square root of p. This is easy to verify using the - fact that q^T*q = 1. - """ - - def __init__(self, logits, targets=None, seed=None): - """Instantiates a CategoricalLogitsNegativeLogProbLoss. - - Args: - logits: Tensor of shape [batch_size, output_size]. Parameters for - underlying distribution. - targets: None or Tensor of shape [output_size]. Each elements contains an - index in [0, output_size). - seed: int or None. Default random seed when sampling. - """ - self._logits = logits - self._targets = targets - super(CategoricalLogitsNegativeLogProbLoss, self).__init__(seed=seed) - - @property - def targets(self): - return self._targets - - @property - def dist(self): - return categorical.Categorical(logits=self._logits) - - @property - def _probs(self): - return self.dist.probs - - @property - def _sqrt_probs(self): - return math_ops.sqrt(self._probs) - - @property - def params(self): - return self._logits - - def multiply_fisher(self, vector): - probs = self._probs - return vector * probs - probs * math_ops.reduce_sum( - vector * probs, axis=-1, keepdims=True) - - def multiply_fisher_factor(self, vector): - probs = self._probs - sqrt_probs = self._sqrt_probs - return sqrt_probs * vector - probs * math_ops.reduce_sum( - sqrt_probs * vector, axis=-1, keepdims=True) - - def multiply_fisher_factor_transpose(self, vector): - probs = self._probs - sqrt_probs = self._sqrt_probs - return sqrt_probs * vector - sqrt_probs * math_ops.reduce_sum( - probs * vector, axis=-1, keepdims=True) - - def multiply_fisher_factor_replicated_one_hot(self, index): - assert len(index) == 1, "Length of index was {}".format(len(index)) - probs = self._probs - sqrt_probs = self._sqrt_probs - sqrt_probs_slice = array_ops.expand_dims(sqrt_probs[:, index[0]], -1) - padded_slice = insert_slice_in_zeros(sqrt_probs_slice, 1, - int(sqrt_probs.shape[1]), index[0]) - return padded_slice - probs * sqrt_probs_slice - - @property - def fisher_factor_inner_shape(self): - return array_ops.shape(self._logits) - - @property - def fisher_factor_inner_static_shape(self): - return self._logits.shape - - -class MultiBernoulliNegativeLogProbLoss(DistributionNegativeLogProbLoss, - NaturalParamsNegativeLogProbLoss): - """Neg log prob loss for multiple Bernoulli distributions param'd by logits. - - Represents N independent Bernoulli distributions where N = len(logits). Its - Fisher Information matrix is given by, - - F = diag(p * (1-p)) - p = sigmoid(logits) - - As F is diagonal with positive entries, its factor B is, - - B = diag(sqrt(p * (1-p))) - """ - - def __init__(self, logits, targets=None, seed=None): - self._logits = logits - self._targets = targets - super(MultiBernoulliNegativeLogProbLoss, self).__init__(seed=seed) - - @property - def targets(self): - return self._targets - - @property - def dist(self): - return bernoulli.Bernoulli(logits=self._logits) - - @property - def _probs(self): - return self.dist.probs - - @property - def params(self): - return self._logits - - def multiply_fisher(self, vector): - return self._probs * (1 - self._probs) * vector - - def multiply_fisher_factor(self, vector): - return math_ops.sqrt(self._probs * (1 - self._probs)) * vector - - def multiply_fisher_factor_transpose(self, vector): - return self.multiply_fisher_factor(vector) # it's symmetric in this case - - def multiply_fisher_factor_replicated_one_hot(self, index): - assert len(index) == 1, "Length of index was {}".format(len(index)) - probs_slice = array_ops.expand_dims(self._probs[:, index[0]], -1) - output_slice = math_ops.sqrt(probs_slice * (1 - probs_slice)) - return insert_slice_in_zeros(output_slice, 1, int(self._logits.shape[1]), - index[0]) - - @property - def fisher_factor_inner_shape(self): - return array_ops.shape(self._logits) - - @property - def fisher_factor_inner_static_shape(self): - return self._logits.shape - - -def insert_slice_in_zeros(slice_to_insert, dim, dim_size, position): - """Inserts slice into a larger tensor of zeros. - - Forms a new tensor which is the same shape as slice_to_insert, except that - the dimension given by 'dim' is expanded to the size given by 'dim_size'. - 'position' determines the position (index) at which to insert the slice within - that dimension. - - Assumes slice_to_insert.shape[dim] = 1. - - Args: - slice_to_insert: The slice to insert. - dim: The dimension which to expand with zeros. - dim_size: The new size of the 'dim' dimension. - position: The position of 'slice_to_insert' in the new tensor. - - Returns: - The new tensor. - - Raises: - ValueError: If the slice's shape at the given dim is not 1. - """ - slice_shape = slice_to_insert.shape - if slice_shape[dim] != 1: - raise ValueError("Expected slice_to_insert.shape to have {} dim of 1, but " - "was {}".format(dim, slice_to_insert.shape[dim])) - - before = [0] * int(len(slice_shape)) - after = before[:] - before[dim] = position - after[dim] = dim_size - position - 1 - - return array_ops.pad(slice_to_insert, list(zip(before, after))) - - -class OnehotCategoricalLogitsNegativeLogProbLoss( - CategoricalLogitsNegativeLogProbLoss): - """Neg log prob loss for a categorical distribution with onehot targets. - - Identical to CategoricalLogitsNegativeLogProbLoss except that the underlying - distribution is OneHotCategorical as opposed to Categorical. - """ - - @property - def dist(self): - return onehot_categorical.OneHotCategorical(logits=self._logits) diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py b/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py deleted file mode 100644 index 4279cb2792..0000000000 --- a/tensorflow/contrib/kfac/python/ops/loss_functions_lib.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Loss functions to be used by LayerCollection.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.kfac.python.ops.loss_functions import * -from tensorflow.python.util.all_util import remove_undocumented -# pylint: enable=unused-import,line-too-long,wildcard-import - -_allowed_symbols = [ - "LossFunction", - "NegativeLogProbLoss", - "NaturalParamsNegativeLogProbLoss", - "DistributionNegativeLogProbLoss", - "NormalMeanNegativeLogProbLoss", - "NormalMeanVarianceNegativeLogProbLoss", - "CategoricalLogitsNegativeLogProbLoss", - "OnehotCategoricalLogitsNegativeLogProbLoss", - "MultiBernoulliNegativeLogProbLoss", - "insert_slice_in_zeros", -] - -remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/op_queue.py b/tensorflow/contrib/kfac/python/ops/op_queue.py deleted file mode 100644 index b6d9d37a31..0000000000 --- a/tensorflow/contrib/kfac/python/ops/op_queue.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Helper for choosing which op to run next in a distributed setting.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import ops as tf_ops - - -class OpQueue(object): - """Class for choosing which Op to run next. - - Constructs an infinitely repeating sequence of Ops in shuffled order. - - In K-FAC, this can be used to distribute inverse update operations among - workers. - """ - - def __init__(self, ops, seed=None): - """Initializes an OpQueue. - - Args: - ops: list of TensorFlow Ops. Ops to be selected from. All workers must - initialize with the same set of ops. - seed: int or None. Random seed used when shuffling order of ops. - """ - self._ops_by_name = {op.name: op for op in ops} - - # Construct a (shuffled) Dataset with Op names. - op_names = tf_ops.convert_to_tensor(list(sorted(op.name for op in ops))) - op_names_dataset = (dataset_ops.Dataset.from_tensor_slices(op_names) - .shuffle(len(ops), seed=seed).repeat()) - self._next_op_name = op_names_dataset.make_one_shot_iterator().get_next() - - @property - def ops(self): - """Ops this OpQueue can return in next_op().""" - return self._ops_by_name.values() - - def next_op(self, sess): - """Chooses which op to run next. - - Note: This call will make a call to sess.run(). - - Args: - sess: tf.Session. - - Returns: - Next Op chosen from 'ops'. - """ - # In Python 3, type(next_op_name) == bytes. Calling bytes.decode('ascii') - # returns a str. - next_op_name = sess.run(self._next_op_name).decode('ascii') - return self._ops_by_name[next_op_name] diff --git a/tensorflow/contrib/kfac/python/ops/op_queue_lib.py b/tensorflow/contrib/kfac/python/ops/op_queue_lib.py deleted file mode 100644 index 09c9a4ab33..0000000000 --- a/tensorflow/contrib/kfac/python/ops/op_queue_lib.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Helper for choosing which op to run next in a distributed setting.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.kfac.python.ops.op_queue import * -from tensorflow.python.util.all_util import remove_undocumented -# pylint: enable=unused-import,line-too-long,wildcard-import - -_allowed_symbols = [ - 'OpQueue', -] - -remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py deleted file mode 100644 index 38605259b5..0000000000 --- a/tensorflow/contrib/kfac/python/ops/optimizer.py +++ /dev/null @@ -1,727 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""The KFAC optimizer.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import warnings - -# pylint disable=long-line -from tensorflow.contrib.kfac.python.ops import curvature_matrix_vector_products as cmvp -from tensorflow.contrib.kfac.python.ops import estimator as est -# pylint enable=long-line - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables as tf_variables -from tensorflow.python.training import gradient_descent - - -class KfacOptimizer(gradient_descent.GradientDescentOptimizer): - """The KFAC Optimizer (https://arxiv.org/abs/1503.05671).""" - - def __init__(self, - learning_rate, - cov_ema_decay, - damping, - layer_collection, - var_list=None, - momentum=0.9, - momentum_type="regular", - norm_constraint=None, - name="KFAC", - estimation_mode="gradients", - colocate_gradients_with_ops=True, - batch_size=None, - placement_strategy=None, - **kwargs): - """Initializes the KFAC optimizer with the given settings. - - Args: - learning_rate: The base learning rate for the optimizer. Should probably - be set to 1.0 when using momentum_type = 'qmodel', but can still be - set lowered if desired (effectively lowering the trust in the - quadratic model.) - cov_ema_decay: The decay factor used when calculating the covariance - estimate moving averages. - damping: The damping factor used to stabilize training due to errors in - the local approximation with the Fisher information matrix, and to - regularize the update direction by making it closer to the gradient. - If damping is adapted during training then this value is used for - initializing damping variable. - (Higher damping means the update looks more like a standard gradient - update - see Tikhonov regularization.) - layer_collection: The layer collection object, which holds the fisher - blocks, Kronecker factors, and losses associated with the - graph. The layer_collection cannot be modified after KfacOptimizer's - initialization. - var_list: Optional list or tuple of variables to train. Defaults to the - list of variables collected in the graph under the key - `GraphKeys.TRAINABLE_VARIABLES`. - momentum: The momentum decay constant to use. Only applies when - momentum_type is 'regular' or 'adam'. (Default: 0.9) - momentum_type: The type of momentum to use in this optimizer, one of - 'regular', 'adam', or 'qmodel'. (Default: 'regular') - norm_constraint: float or Tensor. If specified, the update is scaled down - so that its approximate squared Fisher norm v^T F v is at most the - specified value. May only be used with momentum type 'regular'. - (Default: None) - name: The name for this optimizer. (Default: 'KFAC') - estimation_mode: The type of estimator to use for the Fishers. Can be - 'gradients', 'empirical', 'curvature_propagation', or 'exact'. - (Default: 'gradients'). See the doc-string for FisherEstimator for - more a more detailed description of these options. - colocate_gradients_with_ops: Whether we should request gradients we - compute in the estimator be colocated with their respective ops. - (Default: True) - batch_size: The size of the mini-batch. Only needed when momentum_type - == 'qmodel' or when automatic adjustment is used. (Default: None) - placement_strategy: string, Device placement strategy used when creating - covariance variables, covariance ops, and inverse ops. - (Default: `None`) - **kwargs: Arguments to be passed to specific placement - strategy mixin. Check `placement.RoundRobinPlacementMixin` for example. - - Raises: - ValueError: If the momentum type is unsupported. - ValueError: If clipping is used with momentum type other than 'regular'. - ValueError: If no losses have been registered with layer_collection. - ValueError: If momentum is non-zero and momentum_type is not 'regular' - or 'adam'. - """ - warnings.warn( - "third_party.tensorflow.contrib.kfac is deprecated." - "This will be removed on 15-07-2018. Check README for further details.", - DeprecationWarning) - # Parameters to be passed to the Fisher estimator: - self._variables = var_list or tf_variables.trainable_variables - self._cov_ema_decay = cov_ema_decay - self._layers = layer_collection - self._estimation_mode = estimation_mode - self._colocate_gradients_with_ops = colocate_gradients_with_ops - - # The below parameters are required only if damping needs to be adapted. - # These parameters can be set by calling - # set_damping_adaptation_params() explicitly. - self._damping_adaptation_decay = 0.95 - self._damping_adaptation_interval = 5 - # Check section 6.5 KFAC paper. omega(1) = pow(damping decay, interval) - self._omega = ( - self._damping_adaptation_decay**self._damping_adaptation_interval) - self._adapt_damping = False - self._min_damping = 1e-5 - self._prev_train_batch = None - self._is_chief = False - self._loss_fn = None - self._damping_constant = damping - self._damping = None - self._rho = None - self._prev_loss = None - self._q_model_change = None - self._update_damping_op = None - - momentum_type = momentum_type.lower() - legal_momentum_types = ["regular", "adam", "qmodel"] - - if momentum_type not in legal_momentum_types: - raise ValueError("Unsupported momentum type {}. Must be one of {}." - .format(momentum_type, legal_momentum_types)) - if momentum_type != "regular" and norm_constraint is not None: - raise ValueError("Update clipping is only supported with momentum " - "type 'regular'.") - if momentum_type not in ["regular", "adam"] and momentum != 0: - raise ValueError("Momentum must be unspecified if using a momentum_type " - "other than 'regular' or 'adam'.") - - # Extra parameters of the optimizer - self._momentum = momentum - self._momentum_type = momentum_type - self._norm_constraint = norm_constraint - self._batch_size = batch_size - self._placement_strategy = placement_strategy - - with variable_scope.variable_scope(name): - self._fisher_est = est.make_fisher_estimator( - placement_strategy=placement_strategy, - variables=self._variables, - cov_ema_decay=self._cov_ema_decay, - damping=self.damping, - layer_collection=self._layers, - exps=(-1,), - estimation_mode=self._estimation_mode, - colocate_gradients_with_ops=self._colocate_gradients_with_ops, - **kwargs) - - super(KfacOptimizer, self).__init__(learning_rate, name=name) - - def set_damping_adaptation_params(self, - is_chief, - prev_train_batch, - loss_fn, - min_damping=1e-5, - damping_adaptation_decay=0.99, - damping_adaptation_interval=5): - """Sets parameters required to adapt damping during training. - - When called, enables damping adaptation according to the Levenberg-Marquardt - style rule described in Section 6.5 of "Optimizing Neural Networks with - Kronecker-factored Approximate Curvature". - - Note that this function creates Tensorflow variables which store a few - scalars and are accessed by the ops which update the damping (as part - of the training op returned by the minimize() method). - - Args: - is_chief: `Boolean`, `True` if the worker is chief. - prev_train_batch: Training data used to minimize loss in the previous - step. This will be used to evaluate loss by calling - `loss_fn(prev_train_batch)`. - loss_fn: `function` that takes as input training data tensor and returns - a scalar loss. - min_damping: `float`(Optional), Minimum value the damping parameter - can take. Default value 1e-5. - damping_adaptation_decay: `float`(Optional), The `damping` parameter is - multiplied by the `damping_adaptation_decay` every - `damping_adaptation_interval` number of iterations. Default value 0.99. - damping_adaptation_interval: `int`(Optional), Number of steps in between - updating the `damping` parameter. Default value 5. - - Raises: - ValueError: If `set_damping_adaptation_params` is already called and the - the `adapt_damping` is `True`. - """ - if self._adapt_damping: - raise ValueError("Damping adaptation parameters already set.") - - with variable_scope.variable_scope(self.get_name()): - self._adapt_damping = True - self._is_chief = is_chief - self._prev_train_batch = prev_train_batch - self._loss_fn = loss_fn - self._damping_adaptation_decay = damping_adaptation_decay - self._damping_adaptation_interval = damping_adaptation_interval - self._omega = ( - self._damping_adaptation_decay**self._damping_adaptation_interval) - self._min_damping = min_damping - - self._rho = variable_scope.get_variable( - "rho", shape=(), dtype=dtypes.float32, trainable=False) # LM ratio. - self._prev_loss = variable_scope.get_variable( - "prev_loss", shape=(), dtype=dtypes.float32, trainable=False) - self._q_model_change = variable_scope.get_variable( - "q_model_change", shape=(), dtype=dtypes.float32, trainable=False) - self._damping = variable_scope.get_variable( - "damping", initializer=self._damping_constant, trainable=False) - - @property - def variables(self): - return self._fisher_est.variables - - @property - def damping(self): - if self._damping: - return self._damping - else: - return self._damping_constant - - @property - def damping_adaptation_interval(self): - return self._damping_adaptation_interval - - def make_vars_and_create_op_thunks(self): - """Make vars and create op thunks. - - Returns: - cov_update_thunks: List of cov update thunks. Corresponds one-to-one with - the list of factors given by the "factors" property. - inv_update_thunks: List of inv update thunks. Corresponds one-to-one with - the list of factors given by the "factors" property. - """ - scope = self.get_name() + "/" + self._fisher_est.name - return self._fisher_est.make_vars_and_create_op_thunks(scope=scope) - - def create_ops_and_vars_thunks(self): - """Create thunks that make the ops and vars on demand. - - This function returns 4 lists of thunks: cov_variable_thunks, - cov_update_thunks, inv_variable_thunks, and inv_update_thunks. - - The length of each list is the number of factors and the i-th element of - each list corresponds to the i-th factor (given by the "factors" property). - - Note that the execution of these thunks must happen in a certain - partial order. The i-th element of cov_variable_thunks must execute - before the i-th element of cov_update_thunks (and also the i-th element - of inv_update_thunks). Similarly, the i-th element of inv_variable_thunks - must execute before the i-th element of inv_update_thunks. - - TL;DR (oversimplified): Execute the thunks according to the order that - they are returned. - - Returns: - cov_variable_thunks: A list of thunks that make the cov variables. - cov_update_thunks: A list of thunks that make the cov update ops. - inv_variable_thunks: A list of thunks that make the inv variables. - inv_update_thunks: A list of thunks that make the inv update ops. - """ - scope = self.get_name() + "/" + self._fisher_est.name - return self._fisher_est.create_ops_and_vars_thunks(scope=scope) - - def minimize(self, *args, **kwargs): - # Should this variable scope encompass everything below? Or will the super- - # class make another copy of the same name scope? - with variable_scope.variable_scope(self.get_name()): - kwargs["var_list"] = kwargs.get("var_list") or self.variables - if set(kwargs["var_list"]) != set(self.variables): - raise ValueError("var_list doesn't match with set of Fisher-estimating " - "variables.") - if self._adapt_damping and self._is_chief: - global_step = kwargs.get("global_step", None) - if not global_step: - raise KeyError("global_step needs to be passed to optimizer.minimize " - "if damping parameter is adapted.") - update_damping_op = self._update_damping(self._prev_train_batch, - global_step) - with ops.control_dependencies([update_damping_op]): - loss = args[0] - loss_assign_op = state_ops.assign(self._prev_loss, loss) - train_op = super(KfacOptimizer, self).minimize(*args, **kwargs) - return control_flow_ops.group(loss_assign_op, train_op) - else: - return super(KfacOptimizer, self).minimize(*args, **kwargs) - - def compute_gradients(self, *args, **kwargs): - # args[1] could be our var_list - if len(args) > 1: - var_list = args[1] - else: - kwargs["var_list"] = kwargs.get("var_list") or self.variables - var_list = kwargs["var_list"] - - if set(var_list) != set(self.variables): - raise ValueError("var_list doesn't match with set of Fisher-estimating " - "variables.") - return super(KfacOptimizer, self).compute_gradients(*args, **kwargs) - - def apply_gradients(self, grads_and_vars, *args, **kwargs): - """Applies gradients to variables. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - *args: Additional arguments for super.apply_gradients. - **kwargs: Additional keyword arguments for super.apply_gradients. - - Returns: - An `Operation` that applies the specified gradients. - """ - # In Python 3, grads_and_vars can be a zip() object which can only be - # iterated over once. By converting it to a list, we ensure that it can be - # iterated over more than once. - grads_and_vars = list(grads_and_vars) - - # Compute step. - steps_and_vars = self._compute_update_steps(grads_and_vars) - - # Update trainable variables with this step. - return super(KfacOptimizer, self).apply_gradients(steps_and_vars, *args, - **kwargs) - - def _squared_fisher_norm(self, grads_and_vars, precon_grads_and_vars): - """Computes the squared (approximate) Fisher norm of the updates. - - This is defined as v^T F v, where F is the approximate Fisher matrix - as computed by the estimator, and v = F^{-1} g, where g is the gradient. - This is computed efficiently as v^T g. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - precon_grads_and_vars: List of (preconditioned gradient, variable) pairs. - Must be the result of calling `self._fisher_est.multiply_inverse` - on `grads_and_vars`. - - Returns: - Scalar representing the squared norm. - - Raises: - ValueError: if the two list arguments do not contain the same variables, - in the same order. - """ - for (_, gvar), (_, pgvar) in zip(grads_and_vars, precon_grads_and_vars): - if gvar is not pgvar: - raise ValueError("The variables referenced by the two arguments " - "must match.") - terms = [ - math_ops.reduce_sum(grad * pgrad) - for (grad, _), (pgrad, _) in zip(grads_and_vars, precon_grads_and_vars) - ] - return math_ops.reduce_sum(terms) - - def _update_clip_coeff(self, grads_and_vars, precon_grads_and_vars): - """Computes the scale factor for the update to satisfy the norm constraint. - - Defined as min(1, sqrt(c / r^T F r)), where c is the norm constraint, - F is the approximate Fisher matrix, and r is the update vector, i.e. - -alpha * v, where alpha is the learning rate, and v is the preconditioned - gradient. - - This is based on Section 5 of Ba et al., Distributed Second-Order - Optimization using Kronecker-Factored Approximations. Note that they - absorb the learning rate alpha (which they denote eta_max) into the formula - for the coefficient, while in our implementation, the rescaling is done - before multiplying by alpha. Hence, our formula differs from theirs by a - factor of alpha. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - precon_grads_and_vars: List of (preconditioned gradient, variable) pairs. - Must be the result of calling `self._fisher_est.multiply_inverse` - on `grads_and_vars`. - - Returns: - Scalar representing the coefficient which should be applied to the - preconditioned gradients to satisfy the norm constraint. - """ - sq_norm_grad = self._squared_fisher_norm(grads_and_vars, - precon_grads_and_vars) - sq_norm_up = sq_norm_grad * self._learning_rate**2 - return math_ops.minimum(1., - math_ops.sqrt(self._norm_constraint / sq_norm_up)) - - def _clip_updates(self, grads_and_vars, precon_grads_and_vars): - """Rescales the preconditioned gradients to satisfy the norm constraint. - - Rescales the preconditioned gradients such that the resulting update r - (after multiplying by the learning rate) will satisfy the norm constraint. - This constraint is that r^T F r <= C, where F is the approximate Fisher - matrix, and C is the norm_constraint attribute. See Section 5 of - Ba et al., Distributed Second-Order Optimization using Kronecker-Factored - Approximations. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - precon_grads_and_vars: List of (preconditioned gradient, variable) pairs. - Must be the result of calling `self._fisher_est.multiply_inverse` - on `grads_and_vars`. - - Returns: - List of (rescaled preconditioned gradient, variable) pairs. - """ - coeff = self._update_clip_coeff(grads_and_vars, precon_grads_and_vars) - return [(pgrad * coeff, var) for pgrad, var in precon_grads_and_vars] - - def _compute_prev_updates(self, variables): - """Computes previous updates as negative velocities scaled by learning rate. - - Args: - variables: List of variables in the graph that the update will be - applied to. - - Returns: - List of previous updates applied to the `variables`. - """ - return list( - -1 * self._learning_rate * self._zeros_slot(var, "velocity", self._name) - for var in variables) - - def _compute_qmodel_hyperparams(self, precon_grads, prev_updates, grads, - variables): - """Compute optimal update hyperparameters from the quadratic model. - - More specifically, if L is the loss we minimize a quadratic approximation - of L(theta + d) which we denote by qmodel(d) with - d = alpha*precon_grad + mu*prev_update with respect to alpha and mu, where - - qmodel(d) = (1/2) * d^T * B * d + grad^T*d + L(theta) . - - Unlike in the KL clipping approach we use the non-approximated quadratic - model where the curvature matrix C is the true Fisher on the current - mini-batch (computed without any approximations beyond mini-batch sampling), - with the usual Tikhonov damping/regularization applied, - - C = F + damping * I - - See Section 7 of https://arxiv.org/abs/1503.05671 for a derivation of - the formula. See Appendix C for a discussion of the trick of using - a factorized Fisher matrix to more efficiently compute the required - vector-matrix-vector products. - - Note that the elements of all 4 lists passed to this function must - be in correspondence with each other. - - Args: - precon_grads: List of preconditioned gradients. - prev_updates: List of updates computed at the previous iteration. - grads: List of gradients. - variables: List of variables in the graph that the update will be - applied to. (Note that this function doesn't actually apply the - update.) - - Returns: - (alpha, mu, qmodel_change), where alpha and mu are chosen to optimize the - quadratic model, and - qmodel_change = qmodel(alpha*precon_grad + mu*prev_update) - qmodel(0) - = qmodel(alpha*precon_grad + mu*prev_update) - L(theta). - """ - - cmvpc = cmvp.CurvatureMatrixVectorProductComputer(self._layers.losses, - variables) - - # compute the matrix-vector products with the transposed Fisher factor - fft_precon_grads = cmvpc.multiply_fisher_factor_transpose(precon_grads) - fft_prev_updates = cmvpc.multiply_fisher_factor_transpose(prev_updates) - batch_size = math_ops.cast( - self._batch_size, dtype=fft_precon_grads[0].dtype) - - # compute the entries of the 2x2 matrix - m_11 = ( - _inner_product_list(fft_precon_grads, fft_precon_grads) / batch_size + - self.damping * _inner_product_list(precon_grads, precon_grads)) - - m_21 = ( - _inner_product_list(fft_prev_updates, fft_precon_grads) / batch_size + - self.damping * _inner_product_list(prev_updates, precon_grads)) - - m_22 = ( - _inner_product_list(fft_prev_updates, fft_prev_updates) / batch_size + - self.damping * _inner_product_list(prev_updates, prev_updates)) - - def non_zero_prevupd_case(): - r"""Computes optimal (alpha, mu) given non-zero previous update. - - We solve the full 2x2 linear system. See Martens & Grosse (2015), - Section 7, definition of $\alpha^*$ and $\mu^*$. - - Returns: - (alpha, mu, qmodel_change), where alpha and mu are chosen to optimize - the quadratic model, and - qmodel_change = qmodel(alpha*precon_grad + mu*prev_update) - qmodel(0). - """ - m = ops.convert_to_tensor([[m_11, m_21], [m_21, m_22]]) - - c = ops.convert_to_tensor([[_inner_product_list(grads, precon_grads)], - [_inner_product_list(grads, prev_updates)]]) - - sol = -1. * _two_by_two_solve(m, c) - alpha = sol[0] - mu = sol[1] - qmodel_change = 0.5 * math_ops.reduce_sum(sol * c) - - return alpha, mu, qmodel_change - - def zero_prevupd_case(): - r"""Computes optimal (alpha, mu) given all-zero previous update. - - The linear system reduces to 1x1. See Martens & Grosse (2015), - Section 6.4, definition of $\alpha^*$. - - Returns: - (alpha, 0.0, qmodel_change), where alpha is chosen to optimize the - quadratic model, and - qmodel_change = qmodel(alpha*precon_grad) - qmodel(0) - """ - m = m_11 - c = _inner_product_list(grads, precon_grads) - - alpha = -c / m - mu = 0.0 - qmodel_change = 0.5 * alpha * c - - return alpha, mu, qmodel_change - - return control_flow_ops.cond( - math_ops.equal(m_22, 0.0), zero_prevupd_case, non_zero_prevupd_case) - - def _assign_q_model_change(self, q_model_change): - """Assigns `q_model_change` to `self._q_model_change` if damping is adapted. - - Note only the chief worker does the assignment. - - Args: - q_model_change: Scalar tensor of type `float32`. - - Returns: - If `adapt_damping` is `True` then returns an assign op, Otherwise returns - a no_op(). - """ - if self._adapt_damping and self._is_chief: - q_model_assign_op = state_ops.assign(self._q_model_change, q_model_change) - else: - q_model_assign_op = control_flow_ops.no_op() - return q_model_assign_op - - def _compute_qmodel_hyperparams_wrapper(self, grads_and_vars, - precon_grads_and_vars): - """Wrapper function for `self._compute_qmodel_hyperparams`. - - Constructs a list of preconditioned gradients and variables. Also creates a - op to assign the computed q model change to `self._q_model_change`. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - precon_grads_and_vars: List of (preconditioned gradients, variable) - pairs. - - Returns: - (alpha, mu, q_model_assign_op), where alpha and mu are chosen to optimize - the quadratic model, `q_model_assign_op` assigns the computed q model - change to `self._q_model_change`. - """ - precon_grads = list( - precon_grad for (precon_grad, _) in precon_grads_and_vars) - grads = list(grad for (grad, _) in grads_and_vars) - variables = list(var for (_, var) in grads_and_vars) - prev_updates = self._compute_prev_updates(variables) - # Compute optimal velocity update parameters according to quadratic model - alpha, mu, q_model_change = self._compute_qmodel_hyperparams( - precon_grads, prev_updates, grads, variables) - - return alpha, mu, self._assign_q_model_change(q_model_change) - - def _compute_update_steps(self, grads_and_vars): - """Computes the update steps for the variables given the gradients. - - Args: - grads_and_vars: List of (gradient, variable) pairs. - - Returns: - A list of tuple (assign_op ,var) where `assign_op` assigns the update - steps to `var`. - """ - - if self._momentum_type == "regular": - # Compute "preconditioned" gradient. - precon_grads_and_vars = self._fisher_est.multiply_inverse(grads_and_vars) - - # Apply "KL clipping" if asked for. - if self._norm_constraint is not None: - precon_grads_and_vars = self._clip_updates(grads_and_vars, - precon_grads_and_vars) - - # Update the velocity with this and return it as the step. - if self._adapt_damping and self._is_chief: - _, _, q_model_assign_op = self._compute_qmodel_hyperparams_wrapper( - grads_and_vars, precon_grads_and_vars) - with ops.control_dependencies([q_model_assign_op]): - return self._update_velocities(precon_grads_and_vars, self._momentum) - else: - return self._update_velocities(precon_grads_and_vars, self._momentum) - elif self._momentum_type == "adam": - # Update velocity. - velocities_and_vars = self._update_velocities(grads_and_vars, - self._momentum) - # Return "preconditioned" velocity vector as the step. - return self._fisher_est.multiply_inverse(velocities_and_vars) - - elif self._momentum_type == "qmodel": - # Compute "preconditioned" gradient. - precon_grads_and_vars = self._fisher_est.multiply_inverse(grads_and_vars) - - # Compute optimal velocity update parameters according to quadratic model - alpha, mu, q_model_assign_op = self._compute_qmodel_hyperparams_wrapper( - grads_and_vars, precon_grads_and_vars) - - with ops.control_dependencies([q_model_assign_op]): - return self._update_velocities( - precon_grads_and_vars, mu, vec_coeff=-alpha) - - def _update_velocities(self, vecs_and_vars, decay, vec_coeff=1.0): - """Updates the velocities of the variables with the given vectors. - - Args: - vecs_and_vars: List of (vector, variable) pairs. - decay: How much to decay the old velocity by. This is often referred to - as the 'momentum constant'. - vec_coeff: Coefficient to apply to the vectors before adding them to the - velocity. - - Returns: - A list of (velocity, var) indicating the new velocity for each var. - """ - - def _update_velocity(vec, var): - velocity = self._zeros_slot(var, "velocity", self._name) - with ops.colocate_with(velocity): - # NOTE(mattjj): read/modify/write race condition not suitable for async. - - # Compute the new velocity for this variable. - new_velocity = decay * velocity + vec_coeff * vec - - # Save the updated velocity. - return (array_ops.identity(velocity.assign(new_velocity)), var) - - # Go through variable and update its associated part of the velocity vector. - return [_update_velocity(vec, var) for vec, var in vecs_and_vars] - - def _update_damping(self, prev_batch, global_step): - """Adapts damping parameter. Check KFAC (Section 6.5) for the details. - - The damping parameter is updated according to the Levenberg-Marquardt rule - every `self._damping_adaptation_interval` iterations. - - Args: - prev_batch: Tensor or tuple of tensors which can be passed to - `self._loss_fn` to evaluate loss. - global_step: `Variable` which keeps track of number of times the training - variables have been updated. - Returns: - A `tf.cond` op which updates the damping parameter. - """ - def compute_damping(): - """"Adapts damping parameter based on "reduction ratio". - - Reduction ratio captures how closely the quadratic approximation to the - loss function approximates the actual loss within a trust region. The - damping update tries to make the damping as small as possible while - maintaining the property that the quadratic model remains a good local - approximation to the loss function. - - Returns: - An Op to assign newly computed damping value to `self._damping`. - """ - prev_batch_loss = self._loss_fn(prev_batch) - with ops.control_dependencies([prev_batch_loss]): - rho_assign = self._rho.assign( - (prev_batch_loss - self._prev_loss) / self._q_model_change) - with ops.control_dependencies([rho_assign]): - new_damping = control_flow_ops.case( - [(self._rho < 0.25, lambda: self.damping / self._omega), - (self._rho > 0.75, lambda: self.damping * self._omega)], - lambda: self.damping) - with ops.control_dependencies([new_damping]): - new_damping_min = math_ops.maximum(new_damping, self._min_damping) - return control_flow_ops.group(self._damping.assign(new_damping_min)) - - return control_flow_ops.cond( - math_ops.equal( - math_ops.mod(global_step + 1, self._damping_adaptation_interval), - 0), compute_damping, control_flow_ops.no_op) - - -def _inner_product_list(list1, list2): - return math_ops.add_n( - [math_ops.reduce_sum(elt1 * elt2) for elt1, elt2 in zip(list1, list2)]) - - -def _two_by_two_solve(m, c): - # it might be better just to crank out the exact formula for 2x2 inverses - return math_ops.matmul(linalg_ops.matrix_inverse(m), c) diff --git a/tensorflow/contrib/kfac/python/ops/optimizer_lib.py b/tensorflow/contrib/kfac/python/ops/optimizer_lib.py deleted file mode 100644 index 87d1866e06..0000000000 --- a/tensorflow/contrib/kfac/python/ops/optimizer_lib.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""The KFAC optimizer.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.kfac.python.ops.optimizer import * -from tensorflow.python.util.all_util import remove_undocumented -# pylint: enable=unused-import,line-too-long,wildcard-import - -_allowed_symbols = [ - "KfacOptimizer", -] - -remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/placement.py b/tensorflow/contrib/kfac/python/ops/placement.py deleted file mode 100644 index c4454325ae..0000000000 --- a/tensorflow/contrib/kfac/python/ops/placement.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Implements placement strategies for cov and inv ops, cov variables.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import itertools - -from tensorflow.python.framework import ops as tf_ops - - -def _make_thunk_on_device(func, device): - def thunk(): - with tf_ops.device(device): - return func() - return thunk - - -class RoundRobinPlacementMixin(object): - """Implements round robin placement strategy for ops and variables.""" - - def __init__(self, cov_devices=None, inv_devices=None, **kwargs): - """Initializes the RoundRobinPlacementMixin class. - - Args: - cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance - computations will be placed on these devices in a round-robin fashion. - Can be None, which means that no devices are specified. - inv_devices: Iterable of device strings (e.g. '/gpu:0'). Inversion - computations will be placed on these devices in a round-robin fashion. - Can be None, which means that no devices are specified. - **kwargs: Need something here? - - """ - super(RoundRobinPlacementMixin, self).__init__(**kwargs) - self._cov_devices = cov_devices - self._inv_devices = inv_devices - - def make_vars_and_create_op_thunks(self, scope=None): - """Make vars and create op thunks w/ a round-robin device placement start. - - For each factor, all of that factor's cov variables and their associated - update ops will be placed on a particular device. A new device is chosen - for each factor by cycling through list of devices in the - `self._cov_devices` attribute. If `self._cov_devices` is `Non`e then no - explicit device placement occurs. - - An analogous strategy is followed for inverse update ops, with the list of - devices being given by the `self._inv_devices` attribute. - - Inverse variables on the other hand are not placed on any specific device - (they will just use the current the device placement context, whatever - that happens to be). The idea is that the inverse variable belong where - they will be accessed most often, which is the device that actually applies - the preconditioner to the gradient. The user will be responsible for setting - the device context for this. - - Args: - scope: A string or None. If None it will be set to the name of this - estimator (given by the name property). All variables will be created, - and all thunks will execute, inside of a variable scope of the given - name. (Default: None) - - Returns: - cov_update_thunks: List of cov update thunks. Corresponds one-to-one with - the list of factors given by the "factors" property. - inv_update_thunks: List of inv update thunks. Corresponds one-to-one with - the list of factors given by the "factors" property. - """ - # Note: `create_ops_and_vars_thunks` is implemented in `FisherEstimator`. - (cov_variable_thunks_raw, cov_update_thunks_raw, inv_variable_thunks_raw, - inv_update_thunks_raw) = self.create_ops_and_vars_thunks(scope=scope) - - if self._cov_devices: - cov_update_thunks = [] - for cov_variable_thunk, cov_update_thunk, device in zip( - cov_variable_thunks_raw, cov_update_thunks_raw, - itertools.cycle(self._cov_devices)): - with tf_ops.device(device): - cov_variable_thunk() - cov_update_thunks.append(_make_thunk_on_device(cov_update_thunk, - device)) - else: - for cov_variable_thunk in cov_variable_thunks_raw: - cov_variable_thunk() - cov_update_thunks = cov_update_thunks_raw - - for inv_variable_thunk in inv_variable_thunks_raw: - inv_variable_thunk() - - if self._inv_devices: - inv_update_thunks = [] - for inv_update_thunk, device in zip(inv_update_thunks_raw, - itertools.cycle(self._inv_devices)): - inv_update_thunks.append(_make_thunk_on_device(inv_update_thunk, - device)) - else: - inv_update_thunks = inv_update_thunks_raw - - return cov_update_thunks, inv_update_thunks diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py deleted file mode 100644 index 144295f4c7..0000000000 --- a/tensorflow/contrib/kfac/python/ops/utils.py +++ /dev/null @@ -1,709 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.tpu.python.ops import tpu_ops -from tensorflow.contrib.tpu.python.tpu import tpu_function -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import gradients_impl -from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.ops import variables - -# Method used for inverting matrices. -POSDEF_INV_METHOD = "cholesky" -POSDEF_EIG_METHOD = "self_adjoint" - - -def set_global_constants(posdef_inv_method=None): - """Sets various global constants used by the classes in this module.""" - global POSDEF_INV_METHOD - - if posdef_inv_method is not None: - POSDEF_INV_METHOD = posdef_inv_method - - -class SequenceDict(object): - """A dict convenience wrapper that allows getting/setting with sequences.""" - - def __init__(self, iterable=None): - self._dict = dict(iterable or []) - - def __getitem__(self, key_or_keys): - if isinstance(key_or_keys, (tuple, list)): - return list(map(self.__getitem__, key_or_keys)) - else: - return self._dict[key_or_keys] - - def __setitem__(self, key_or_keys, val_or_vals): - if isinstance(key_or_keys, (tuple, list)): - for key, value in zip(key_or_keys, val_or_vals): - self[key] = value - else: - self._dict[key_or_keys] = val_or_vals - - def items(self): - return list(self._dict.items()) - - -def tensors_to_column(tensors): - """Converts a tensor or list of tensors to a column vector. - - Args: - tensors: A tensor or list of tensors. - - Returns: - The tensors reshaped into vectors and stacked on top of each other. - """ - if isinstance(tensors, (tuple, list)): - return array_ops.concat( - tuple(array_ops.reshape(tensor, [-1, 1]) for tensor in tensors), axis=0) - else: - return array_ops.reshape(tensors, [-1, 1]) - - -def column_to_tensors(tensors_template, colvec): - """Converts a column vector back to the shape of the given template. - - Args: - tensors_template: A tensor or list of tensors. - colvec: A 2d column vector with the same shape as the value of - tensors_to_column(tensors_template). - - Returns: - X, where X is tensor or list of tensors with the properties: - 1) tensors_to_column(X) = colvec - 2) X (or its elements) have the same shape as tensors_template (or its - elements) - """ - if isinstance(tensors_template, (tuple, list)): - offset = 0 - tensors = [] - for tensor_template in tensors_template: - sz = np.prod(tensor_template.shape.as_list(), dtype=np.int32) - tensor = array_ops.reshape(colvec[offset:(offset + sz)], - tensor_template.shape) - tensors.append(tensor) - offset += sz - - tensors = tuple(tensors) - else: - tensors = array_ops.reshape(colvec, tensors_template.shape) - - return tensors - - -def kronecker_product(mat1, mat2): - """Computes the Kronecker product two matrices.""" - m1, n1 = mat1.get_shape().as_list() - mat1_rsh = array_ops.reshape(mat1, [m1, 1, n1, 1]) - m2, n2 = mat2.get_shape().as_list() - mat2_rsh = array_ops.reshape(mat2, [1, m2, 1, n2]) - return array_ops.reshape(mat1_rsh * mat2_rsh, [m1 * m2, n1 * n2]) - - -def layer_params_to_mat2d(vector): - """Converts a vector shaped like layer parameters to a 2D matrix. - - In particular, we reshape the weights/filter component of the vector to be - 2D, flattening all leading (input) dimensions. If there is a bias component, - we concatenate it to the reshaped weights/filter component. - - Args: - vector: A Tensor or pair of Tensors shaped like layer parameters. - - Returns: - A 2D Tensor with the same coefficients and the same output dimension. - """ - if isinstance(vector, (tuple, list)): - w_part, b_part = vector - w_part_reshaped = array_ops.reshape(w_part, - [-1, w_part.shape.as_list()[-1]]) - return array_ops.concat( - (w_part_reshaped, array_ops.reshape(b_part, [1, -1])), axis=0) - elif isinstance(vector, ops.IndexedSlices): - return vector - else: # Tensor or Tensor-like. - return array_ops.reshape(vector, [-1, vector.shape.as_list()[-1]]) - - -def mat2d_to_layer_params(vector_template, mat2d): - """Converts a canonical 2D matrix representation back to a vector. - - Args: - vector_template: A Tensor or pair of Tensors shaped like layer parameters. - mat2d: A 2D Tensor with the same shape as the value of - layer_params_to_mat2d(vector_template). - - Returns: - A Tensor or pair of Tensors with the same coefficients as mat2d and the same - shape as vector_template. - """ - if isinstance(vector_template, (tuple, list)): - w_part, b_part = mat2d[:-1], mat2d[-1] - return array_ops.reshape(w_part, vector_template[0].shape), b_part - elif isinstance(vector_template, ops.IndexedSlices): - if not isinstance(mat2d, ops.IndexedSlices): - raise TypeError( - "If vector_template is an IndexedSlices, so should mat2d.") - return mat2d - else: - return array_ops.reshape(mat2d, vector_template.shape) - - -def posdef_inv(tensor, damping): - """Computes the inverse of tensor + damping * identity.""" - identity = linalg_ops.eye(tensor.shape.as_list()[0], dtype=tensor.dtype) - damping = math_ops.cast(damping, dtype=tensor.dtype) - return posdef_inv_functions[POSDEF_INV_METHOD](tensor, identity, damping) - - -def posdef_inv_matrix_inverse(tensor, identity, damping): - """Computes inverse(tensor + damping * identity) directly.""" - return linalg_ops.matrix_inverse(tensor + damping * identity) - - -def posdef_inv_cholesky(tensor, identity, damping): - """Computes inverse(tensor + damping * identity) with Cholesky.""" - chol = linalg_ops.cholesky(tensor + damping * identity) - return linalg_ops.cholesky_solve(chol, identity) - - -def posdef_inv_eig(tensor, identity, damping): - """Computes inverse(tensor + damping * identity) with eigendecomposition.""" - eigenvalues, eigenvectors = linalg_ops.self_adjoint_eig( - tensor + damping * identity) - return math_ops.matmul( - eigenvectors / eigenvalues, eigenvectors, transpose_b=True) - - -posdef_inv_functions = { - "matrix_inverse": posdef_inv_matrix_inverse, - "cholesky": posdef_inv_cholesky, - "eig": posdef_inv_eig, -} - - -def posdef_eig(mat): - """Computes the eigendecomposition of a positive semidefinite matrix.""" - return posdef_eig_functions[POSDEF_EIG_METHOD](mat) - - -def posdef_eig_svd(mat): - """Computes the singular values and left singular vectors of a matrix.""" - evals, evecs, _ = linalg_ops.svd(mat) - - return evals, evecs - - -def posdef_eig_self_adjoint(mat): - """Computes eigendecomposition using self_adjoint_eig.""" - evals, evecs = linalg_ops.self_adjoint_eig(mat) - evals = math_ops.abs(evals) # Should be equivalent to svd approach. - - return evals, evecs - - -posdef_eig_functions = { - "self_adjoint": posdef_eig_self_adjoint, - "svd": posdef_eig_svd, -} - - -def cholesky(tensor, damping): - """Computes the inverse of tensor + damping * identity.""" - identity = linalg_ops.eye(tensor.shape.as_list()[0], dtype=tensor.dtype) - damping = math_ops.cast(damping, dtype=tensor.dtype) - return linalg_ops.cholesky(tensor + damping * identity) - - -class SubGraph(object): - """Defines a subgraph given by all the dependencies of a given set of outputs. - """ - - def __init__(self, outputs): - # Set of all ancestor Tensors, Ops to 'outputs'. - self._members = set() - - self._iter_add(outputs) - - def _iter_add(self, root): - """Iteratively adds all of nodes' ancestors using depth first search.""" - stack = [root] - while stack: - nodes = stack.pop() - for node in nodes: - if node in self._members: - continue - self._members.add(node) - - if isinstance(node, ops.Tensor): - stack.append((node.op,)) - elif isinstance(node, ops.Operation): - stack.append(node.inputs) - - def is_member(self, node): - """Check if 'node' is in this subgraph.""" - return node in self._members - - def variable_uses(self, var): - """Computes number of times a variable is used. - - Args: - var: Variable or ResourceVariable instance. - - Returns: - Number of times a variable is used within this subgraph. - - Raises: - ValueError: If 'var' is not a variable type. - """ - if isinstance(var, resource_variable_ops.ResourceVariable): - var = var.handle - elif isinstance(var, variables.Variable): - var = var.value() - else: - raise ValueError("%s does not appear to be a variable." % str(var)) - - return len(self._members.intersection(set(var.consumers()))) - - def filter_list(self, node_list): - """Filters 'node_list' to nodes in this subgraph.""" - filtered_list = [] - for node in node_list: - if self.is_member(node): - filtered_list.append(node) - return filtered_list - - -def generate_random_signs(shape, dtype=dtypes.float32): - """Generate a random tensor with {-1, +1} entries.""" - ints = random_ops.random_uniform(shape, maxval=2, dtype=dtypes.int32) - return 2 * math_ops.cast(ints, dtype=dtype) - 1 - - -def fwd_gradients(ys, xs, grad_xs=None, stop_gradients=None): - """Compute forward-mode gradients.""" - # See b/37888268. - - # This version of forward-mode autodiff is based on code by Tim Cooijmans - # and handles list arguments and certain special cases such as when the - # ys doesn't depend on one or more of the xs, and when ops.IndexedSlices are - # generated by the first gradients_impl.gradients call. - - us = [array_ops.zeros_like(y) + float("nan") for y in ys] - dydxs = gradients_impl.gradients( - ys, xs, grad_ys=us, stop_gradients=stop_gradients) - - # Deal with strange types that gradients_impl.gradients returns but can't - # deal with. - dydxs = [ - ops.convert_to_tensor(dydx) - if isinstance(dydx, ops.IndexedSlices) else dydx for dydx in dydxs - ] - dydxs = [ - array_ops.zeros_like(x) if dydx is None else dydx - for x, dydx in zip(xs, dydxs) - ] - - dysdx = gradients_impl.gradients(dydxs, us, grad_ys=grad_xs) - - return dysdx - - -def on_tpu(): - """Returns True when building a TPU computation.""" - return tpu_function.get_tpu_context().number_of_shards is not None - - -def cross_replica_mean(tensor, name=None): - """Takes mean value of a Tensor across all TPU cores. - - Args: - tensor: Tensor to be synchronized. - name: None or string. Name of Op. - - Returns: - Average of Tensor across all TPU cores. - - Raises: - ValueError: If called outside of TPU context. - """ - with ops.name_scope(name, "cross_replica_mean", [tensor]): - num_shards = tpu_function.get_tpu_context().number_of_shards - if num_shards is None: - raise ValueError( - "Cannot take cross_replica_mean() outside of TPU Context.") - if num_shards == 1: - return tensor - return tpu_ops.cross_replica_sum(tensor / num_shards) - - -def ensure_sequence(obj): - """If `obj` isn't a tuple or list, return a tuple containing `obj`.""" - if isinstance(obj, (tuple, list)): - return obj - else: - return (obj,) - - -def batch_execute(global_step, thunks, batch_size, name=None): - """Executes a subset of ops per global step. - - Given a list of thunks, each of which produces a single stateful op, - ensures that exactly 'batch_size' ops are run per global step. Ops are - scheduled in a round-robin fashion. For example, with 3 ops - - global_step | op0 | op1 | op2 - ------------+-----+-----+----- - 0 | x | x | - ------------+-----+-----+----- - 1 | x | | x - ------------+-----+-----+----- - 2 | | x | x - ------------+-----+-----+----- - 3 | x | x | - ------------+-----+-----+----- - 4 | x | | x - - Does not guarantee order of op execution within a single global step. - - Args: - global_step: Tensor indicating time. Determines which ops run. - thunks: List of thunks. Each thunk encapsulates one op. Return values are - ignored. - batch_size: int. Number of ops to execute per global_step. - name: string or None. Name scope for newly added ops. - - Returns: - List of ops. Exactly 'batch_size' ops are guaranteed to have an effect - every global step. - """ - - def true_fn(thunk): - """Ensures thunk is executed and returns an Op (not a Tensor).""" - - def result(): - with ops.control_dependencies([thunk()]): - return control_flow_ops.no_op() - - return result - - def false_fn(_): - """Executes a no-op.""" - - def result(): - return control_flow_ops.no_op() - - return result - - with ops.name_scope(name, "batch_execute"): - true_fns = [true_fn(thunk) for thunk in thunks] - false_fns = [false_fn(thunk) for thunk in thunks] - num_thunks = len(thunks) - conditions = [ - math_ops.less( - math_ops.mod(batch_size - 1 + global_step * batch_size - j, - num_thunks), batch_size) for j in range(num_thunks) - ] - result = [ - control_flow_ops.cond(condition, true_fn, false_fn) - for (condition, true_fn, - false_fn) in zip(conditions, true_fns, false_fns) - ] - return result - - -def extract_convolution_patches(inputs, - filter_shape, - padding, - strides=None, - dilation_rate=None, - name=None, - data_format=None): - """Extracts inputs to each output coordinate in tf.nn.convolution. - - This is a generalization of tf.extract_image_patches() to tf.nn.convolution(), - where the number of spatial dimensions may be something other than 2. - - Assumes, - - First dimension of inputs is batch_size - - Convolution filter is applied to all input channels. - - Args: - inputs: Tensor of shape [batch_size, ..spatial_image_shape.., - ..spatial_filter_shape.., in_channels]. Inputs to tf.nn.convolution(). - filter_shape: List of ints. Shape of filter passed to tf.nn.convolution(). - padding: string. Padding method. One of "VALID", "SAME". - strides: None or list of ints. Strides along spatial dimensions. - dilation_rate: None or list of ints. Dilation along spatial dimensions. - name: None or str. Name of Op. - data_format: None or str. Format of data. - - Returns: - Tensor of shape [batch_size, ..spatial_image_shape.., - ..spatial_filter_shape.., in_channels] - - Raises: - ValueError: If data_format does not put channel last. - ValueError: If inputs and filter disagree on in_channels. - """ - if not is_data_format_channel_last(data_format): - raise ValueError("Channel must be last dimension.") - with ops.name_scope(name, "extract_convolution_patches", - [inputs, filter_shape, padding, strides, dilation_rate]): - batch_size = inputs.shape.as_list()[0] - in_channels = inputs.shape.as_list()[-1] - - # filter_shape = spatial_filter_shape + [in_channels, out_channels] - spatial_filter_shape = filter_shape[:-2] - if in_channels != filter_shape[-2]: - raise ValueError("inputs and filter_shape must agree on in_channels.") - - # Map each input feature to a location in the output. - out_channels = np.prod(spatial_filter_shape) * in_channels - filters = linalg_ops.eye(out_channels) - filters = array_ops.reshape( - filters, - list(spatial_filter_shape) + [in_channels, out_channels]) - - result = nn_ops.convolution( - inputs, - filters, - padding=padding, - strides=strides, - dilation_rate=dilation_rate) - spatial_output_shape = result.shape.as_list()[1:-1] - result = array_ops.reshape(result, - [batch_size or -1] + spatial_output_shape + - list(spatial_filter_shape) + [in_channels]) - - return result - - -def extract_pointwise_conv2d_patches(inputs, - filter_shape, - name=None, - data_format=None): - """Extract patches for a 1x1 conv2d. - - Args: - inputs: 4-D Tensor of shape [batch_size, height, width, in_channels]. - filter_shape: List of 4 ints. Shape of filter to apply with conv2d() - name: None or str. Name for Op. - data_format: None or str. Format for data. See 'data_format' in - tf.nn.conv2d() for details. - - Returns: - Tensor of shape [batch_size, ..spatial_input_shape.., - ..spatial_filter_shape.., in_channels] - - Raises: - ValueError: if inputs is not 4-D. - ValueError: if filter_shape is not [1, 1, ?, ?] - ValueError: if data_format is not channels-last. - """ - if inputs.shape.ndims != 4: - raise ValueError("inputs must have 4 dims.") - if len(filter_shape) != 4: - raise ValueError("filter_shape must have 4 dims.") - if filter_shape[0] != 1 or filter_shape[1] != 1: - raise ValueError("filter_shape must have shape 1 along spatial dimensions.") - if not is_data_format_channel_last(data_format): - raise ValueError("data_format must be channels last.") - with ops.name_scope(name, "extract_pointwise_conv2d_patches", - [inputs, filter_shape]): - ksizes = [1, 1, 1, 1] # Spatial shape is 1x1. - strides = [1, 1, 1, 1] # Operate on all pixels. - rates = [1, 1, 1, 1] # Dilation has no meaning with spatial shape = 1. - padding = "VALID" # Doesn't matter. - result = array_ops.extract_image_patches(inputs, ksizes, strides, rates, - padding) - - batch_size, input_height, input_width, in_channels = inputs.shape.as_list() - filter_height, filter_width, in_channels, _ = filter_shape - return array_ops.reshape(result, [ - batch_size, input_height, input_width, filter_height, filter_width, - in_channels - ]) - - -def is_data_format_channel_last(data_format): - """True if data_format puts channel last.""" - if data_format is None: - return True - return data_format.endswith("C") - - -def matmul_sparse_dense(A, B, name=None, transpose_a=False, transpose_b=False): # pylint: disable=invalid-name - """Computes matmul(A, B) where A is sparse, B is dense. - - Args: - A: tf.IndexedSlices with dense shape [m, n]. - B: tf.Tensor with shape [n, k]. - name: str. Name of op. - transpose_a: Bool. If true we transpose A before multiplying it by B. - (Default: False) - transpose_b: Bool. If true we transpose B before multiplying it by A. - (Default: False) - - Returns: - tf.IndexedSlices resulting from matmul(A, B). - - Raises: - ValueError: If A doesn't represent a matrix. - ValueError: If B is not rank-2. - """ - with ops.name_scope(name, "matmul_sparse_dense", [A, B]): - if A.indices.shape.ndims != 1 or A.values.shape.ndims != 2: - raise ValueError("A must represent a matrix. Found: %s." % A) - if B.shape.ndims != 2: - raise ValueError("B must be a matrix.") - new_values = math_ops.matmul( - A.values, B, transpose_a=transpose_a, transpose_b=transpose_b) - return ops.IndexedSlices( - new_values, - A.indices, - dense_shape=array_ops.stack([A.dense_shape[0], new_values.shape[1]])) - - -def matmul_diag_sparse(A_diag, B, name=None): # pylint: disable=invalid-name - """Computes matmul(A, B) where A is a diagonal matrix, B is sparse. - - Args: - A_diag: diagonal entries of matrix A of shape [m, m]. - B: tf.IndexedSlices. Represents matrix of shape [m, n]. - name: str. Name of op. - - Returns: - tf.IndexedSlices resulting from matmul(A, B). - - Raises: - ValueError: If A_diag is not rank-1. - ValueError: If B doesn't represent a matrix. - """ - with ops.name_scope(name, "matmul_diag_sparse", [A_diag, B]): - A_diag = ops.convert_to_tensor(A_diag) - if A_diag.shape.ndims != 1: - raise ValueError("A_diag must be a rank-1 Tensor.") - if B.indices.shape.ndims != 1 or B.values.shape.ndims != 2: - raise ValueError("B must represent a matrix. Found: %s." % B) - a = array_ops.gather(A_diag, B.indices) - a = array_ops.reshape(a, list(a.shape) + [1] * (B.values.shape.ndims - 1)) - return ops.IndexedSlices(a * B.values, B.indices, dense_shape=B.dense_shape) - - -class PartitionedTensor(object): - """A Tensor partitioned across its 0-th dimension.""" - - def __init__(self, tensors): - """Initializes PartitionedTensor. - - Args: - tensors: List of Tensors. All Tensors must agree on shape (excepting - batch dimension) and dtype. - - Raises: - ValueError: If 'tensors' has length zero. - ValueError: if contents of 'tensors' don't agree on shape or dtype. - """ - if not tensors: - raise ValueError("tensors must be a list of 1+ Tensors.") - - dtype = tensors[0].dtype - if not all(tensor.dtype == dtype for tensor in tensors): - raise ValueError("all tensors must have dtype = %s." % dtype) - - shape = tensors[0].shape[1:] - if not all(tensor.shape[1:] == shape for tensor in tensors): - raise ValueError("All tensors must have shape = %s (excluding batch " - "dimension)." % shape) - - self.tensors = tensors - self._concats = {} # {device: Tensor} - - @property - def shape(self): - feature_shape = self.tensors[0].shape[1:] - batch_size = sum([tensor.shape[0] for tensor in self.tensors], - tensor_shape.Dimension(0)) - return tensor_shape.TensorShape([batch_size]).concatenate(feature_shape) - - def get_shape(self): - return self.shape - - @property - def dtype(self): - return self.tensors[0].dtype - - def __str__(self): - return "PartitionedTensor([%s, ...], dtype=%s, shape=%s)" % ( - self.tensors[0].name, self.dtype.name, tuple(self.shape.as_list())) - - def __hash__(self): - return hash(tuple(self.tensors)) - - def __eq__(self, other): - if not isinstance(other, PartitionedTensor): - return False - return self.tensors == other.tensors - - def __ne__(self, other): - return not self == other # pylint: disable=g-comparison-negation - - def __getitem__(self, key): - return self.as_tensor()[key] - - def as_tensor(self, dtype=None, name=None, as_ref=False): - with ops.name_scope(name, "PartitionedTensor.as_tensor", self.tensors): - assert not as_ref - assert dtype in [None, self.dtype] - result = array_ops.concat(self.tensors, axis=0) - - # Cache 'result' if we haven't already cached a value for this device. - if result.device not in self._concats: - self._concats[result.device] = result - return self._concats[result.device] - - @property - def device(self): - # PartitionedTensors in general do not live on a single device. If the - # device cannot be determined unambiguously this property will return None. - device = self.tensors[0].device - if all(tensor.device == device for tensor in self.tensors): - return device - return None - - -ops.register_tensor_conversion_function( - PartitionedTensor, - lambda val, dtype, name, as_ref: val.as_tensor(dtype, name, as_ref)) - - -# TODO(b/69623235): Add a function for finding tensors that share gradients -# to eliminate redundant fisher factor computations. diff --git a/tensorflow/contrib/kfac/python/ops/utils_lib.py b/tensorflow/contrib/kfac/python/ops/utils_lib.py deleted file mode 100644 index 330d222dbf..0000000000 --- a/tensorflow/contrib/kfac/python/ops/utils_lib.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility functions.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.kfac.python.ops.utils import * -from tensorflow.python.util.all_util import remove_undocumented -# pylint: enable=unused-import,line-too-long,wildcard-import - -_allowed_symbols = [ - "set_global_constants", - "SequenceDict", - "tensors_to_column", - "column_to_tensors", - "kronecker_product", - "layer_params_to_mat2d", - "mat2d_to_layer_params", - "posdef_inv", - "posdef_inv_matrix_inverse", - "posdef_inv_cholesky", - "posdef_inv_funcs", - "SubGraph", - "generate_random_signs", - "fwd_gradients", - "ensure_sequence", - "batch_execute", - "extract_convolution_patches", - "extract_pointwise_conv2d_patches", - "is_data_format_channel_last", - "matmul_sparse_dense", - "matmul_diag_sparse", -] - -remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) -- GitLab From 2325b1e1979694de07439fae7b4585eb6ed4f99a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Aug 2018 12:56:31 -0700 Subject: [PATCH 069/380] [XLA] Cleanup Alltoall. - Remove unused field 'cross_replica_sum_barrier' for Alltoall. - Update cost analysis. There's no computation in Alltoall. - Cleanup stale TODOs. PiperOrigin-RevId: 209814190 --- tensorflow/compiler/xla/client/xla_builder.h | 4 ---- .../compiler/xla/service/hlo_cost_analysis.cc | 9 --------- .../compiler/xla/service/hlo_instruction.cc | 8 +++----- .../compiler/xla/service/hlo_instruction.h | 5 +---- .../compiler/xla/service/hlo_instructions.cc | 19 +++++-------------- .../compiler/xla/service/hlo_instructions.h | 14 +------------- tensorflow/compiler/xla/service/hlo_parser.cc | 5 ++--- .../compiler/xla/service/hlo_parser_test.cc | 2 +- 8 files changed, 13 insertions(+), 53 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index adb62f5f02..3dbf1e5bee 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -711,8 +711,6 @@ class XlaBuilder { const absl::optional& channel_id = absl::nullopt); // Enqueues an operation that do an Alltoall of the operand cross cores. - // - // TODO(b/110096724): This is NOT YET ready to use. XlaOp AllToAll(const XlaOp& operand, int64 split_dimension, int64 concat_dimension, int64 split_count, const std::vector& replica_groups); @@ -1859,8 +1857,6 @@ XlaOp CrossReplicaSum( const absl::optional& channel_id = absl::nullopt); // Enqueues an operation that do an Alltoall of the operand cross cores. -// -// TODO(b/110096724): This is NOT YET ready to use. XlaOp AllToAll(const XlaOp& operand, int64 split_dimension, int64 concat_dimension, int64 split_count, const std::vector& replica_groups = {}); diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 3e68f59bd9..5add4251ef 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -540,15 +540,6 @@ Status HloCostAnalysis::HandleCrossReplicaSum(const HloInstruction* crs) { } Status HloCostAnalysis::HandleAllToAll(const HloInstruction* hlo) { - // TODO(b/110096724): Compute correct cost here. - double flops = 0.0; - ShapeUtil::ForEachSubshape(hlo->shape(), - [&](const Shape& subshape, const ShapeIndex&) { - if (ShapeUtil::IsArray(subshape)) { - flops += ShapeUtil::ElementsIn(subshape); - } - }); - current_properties_[kFlopsKey] = flops; return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index a211167519..cf1845c8fe 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -314,8 +314,7 @@ StatusOr> HloInstruction::CreateFromProto( proto.shape(), all_operands(), /*replica_groups=*/ std::vector(proto.replica_groups().begin(), - proto.replica_groups().end()), - /*barrier=*/proto.cross_replica_sum_barrier()); + proto.replica_groups().end())); break; } case HloOpcode::kConvolution: @@ -675,10 +674,9 @@ HloInstruction::CreateCrossReplicaSum( /* static */ std::unique_ptr HloInstruction::CreateAllToAll( const Shape& shape, tensorflow::gtl::ArraySlice operands, - const std::vector& replica_groups, - tensorflow::StringPiece barrier) { + const std::vector& replica_groups) { return absl::make_unique(shape, operands, - replica_groups, barrier); + replica_groups); } /* static */ std::unique_ptr HloInstruction::CreateInfeed( diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index fdd34544eb..121a9e55f6 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -466,12 +466,9 @@ class HloInstruction { // within replica 1, 2, 3, and in the gather phase, the received blocks will // be concatenated in the order of 1, 2, 3; another Alltoall will be applied // within replica 4, 5, 0, and the concatenation order is 4, 5, 0. - // - // TODO(b/110096724): This is NOT YET ready to use. static std::unique_ptr CreateAllToAll( const Shape& shape, tensorflow::gtl::ArraySlice operands, - const std::vector& replica_groups, - tensorflow::StringPiece barrier); + const std::vector& replica_groups); // Creates a conversion instruction, where operand is the data to convert and // shape is the target shape for the conversion. diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index 36fac4a266..345ca0053a 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -371,11 +371,9 @@ HloAllReduceInstruction::CloneWithNewOperandsImpl( HloAllToAllInstruction::HloAllToAllInstruction( const Shape& shape, tensorflow::gtl::ArraySlice operands, - const std::vector& replica_groups, - tensorflow::StringPiece barrier) + const std::vector& replica_groups) : HloInstruction(HloOpcode::kAllToAll, shape), - replica_groups_(replica_groups), - cross_replica_sum_barrier_(barrier.begin(), barrier.end()) { + replica_groups_(replica_groups) { for (auto operand : operands) { AppendOperand(operand); } @@ -390,9 +388,7 @@ bool HloAllToAllInstruction::IdenticalSlowPath( [](const ReplicaGroup& a, const ReplicaGroup& b) { return ContainersEqual(a.replica_ids(), b.replica_ids()); - }) && - cross_replica_sum_barrier() == - casted_other.cross_replica_sum_barrier(); + }); } std::unique_ptr @@ -400,8 +396,8 @@ HloAllToAllInstruction::CloneWithNewOperandsImpl( const Shape& shape, tensorflow::gtl::ArraySlice new_operands, HloCloneContext* /*context*/) const { - return absl::make_unique( - shape, new_operands, replica_groups(), cross_replica_sum_barrier()); + return absl::make_unique(shape, new_operands, + replica_groups()); } std::vector HloAllToAllInstruction::ExtraAttributesToStringImpl( @@ -415,10 +411,6 @@ std::vector HloAllToAllInstruction::ExtraAttributesToStringImpl( result.push_back( StrCat("replica_groups={", Join(replica_group_str, ","), "}")); - if (!cross_replica_sum_barrier().empty()) { - result.push_back(StrCat("barrier=\"", cross_replica_sum_barrier(), "\"")); - } - return result; } @@ -426,7 +418,6 @@ HloInstructionProto HloAllToAllInstruction::ToProto() const { HloInstructionProto proto = HloInstruction::ToProto(); *proto.mutable_replica_groups() = {replica_groups_.begin(), replica_groups_.end()}; - proto.set_cross_replica_sum_barrier(cross_replica_sum_barrier_); return proto; } diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index 0a6a0c6233..755e560151 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -275,21 +275,12 @@ class HloAllToAllInstruction : public HloInstruction { public: explicit HloAllToAllInstruction( const Shape& shape, tensorflow::gtl::ArraySlice operand, - const std::vector& replica_groups, - tensorflow::StringPiece barrier); + const std::vector& replica_groups); const std::vector& replica_groups() const { return replica_groups_; } - // TODO(b/110096724): rename this. - void set_cross_replica_sum_barrier(string barrier) { - cross_replica_sum_barrier_ = barrier; - } - string cross_replica_sum_barrier() const { - return cross_replica_sum_barrier_; - } - HloInstructionProto ToProto() const override; private: @@ -307,9 +298,6 @@ class HloAllToAllInstruction : public HloInstruction { HloCloneContext* context) const override; std::vector replica_groups_; - - // The string representation of the barrier config. - string cross_replica_sum_barrier_; }; class HloReverseInstruction : public HloInstruction { diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index beef96476c..90a493d29f 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -681,7 +681,6 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, optional barrier; attrs["replica_groups"] = {/*required=*/false, AttrTy::kBracedInt64ListList, &tmp_groups}; - attrs["barrier"] = {/*required=*/false, AttrTy::kString, &barrier}; if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } @@ -689,8 +688,8 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (tmp_groups) { replica_groups = CreateReplicaGroups(*tmp_groups); } - instruction = builder->AddInstruction(HloInstruction::CreateAllToAll( - shape, operands, replica_groups, barrier ? *barrier : "")); + instruction = builder->AddInstruction( + HloInstruction::CreateAllToAll(shape, operands, replica_groups)); break; } case HloOpcode::kReshape: { diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index f52cfadb81..f310b36bfb 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -1094,7 +1094,7 @@ R"(HloModule AllToAllWithSubgroups ENTRY AllToAllWithSubgroups { input = f32[128,32]{0,1} parameter(0) - ROOT a2a = f32[128,32]{0,1} all-to-all(input), replica_groups={{1,2},{3,0}}, barrier="abc" + ROOT a2a = f32[128,32]{0,1} all-to-all(input), replica_groups={{1,2},{3,0}} } )" -- GitLab From d45904a6165857a58ce5a82a20bdd38a5d00c5ec Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Aug 2018 13:01:31 -0700 Subject: [PATCH 070/380] Fix typo: missing ' ' in error message. PiperOrigin-RevId: 209815036 --- tensorflow/python/estimator/canned/head.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index da9a64c2bc..06593f9520 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -335,8 +335,8 @@ def _check_dense_labels_match_logits_and_reshape( 'Expected labels dimension=%s. Received %s. ' 'Suggested Fix:' 'If your classifier expects one-hot encoding label,' - 'check your n_classes argument to the estimator' - 'and/or the shape of your label.' + 'check your n_classes argument to the estimator ' + 'and/or the shape of your label. ' 'Otherwise, check the shape of your label.' % (expected_labels_dimension, dim1)) expected_labels_shape = array_ops.concat( -- GitLab From 6448db09a76e2803c1a46a7514c4bf82dd7d6261 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Aug 2018 13:19:01 -0700 Subject: [PATCH 071/380] Windows: Fix pip package installation in GPU build The GPU pip package name has been renamed to tensorflow_gpu-*.whl PiperOrigin-RevId: 209817853 --- tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh index 7ac07872e9..f31b0a64e0 100644 --- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh @@ -120,7 +120,7 @@ if [[ "$TF_NIGHTLY" == 1 ]]; then fi # Running python tests on Windows needs pip package installed -PIP_NAME=$(ls ${PY_TEST_DIR}/tensorflow-*.whl) +PIP_NAME=$(ls ${PY_TEST_DIR}/tensorflow_gpu-*.whl) reinstall_tensorflow_pip ${PIP_NAME} TF_GPU_COUNT=${TF_GPU_COUNT:-8} -- GitLab From 54dfc4c499df11facc8aaa141616422db504cc5c Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Wed, 22 Aug 2018 13:26:34 -0700 Subject: [PATCH 072/380] Name assembler Dockerfile --- tensorflow/tools/dockerfiles/README.md | 2 +- .../tools/dockerfiles/{Dockerfile => assembler.Dockerfile} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tensorflow/tools/dockerfiles/{Dockerfile => assembler.Dockerfile} (100%) diff --git a/tensorflow/tools/dockerfiles/README.md b/tensorflow/tools/dockerfiles/README.md index ea80e9feaa..c484c162cb 100644 --- a/tensorflow/tools/dockerfiles/README.md +++ b/tensorflow/tools/dockerfiles/README.md @@ -57,7 +57,7 @@ You can use the `Dockerfile` in this directory to build an editing environment that has all of the Python dependencies you'll need: ```bash -$ docker build -t tf-assembler . +$ docker build -t tf-assembler -f assembler.Dockerfile . # Set --user to set correct permissions on generated files $ docker run --user $(id -u):$(id -g) -it -v $(pwd):/tf tf-assembler bash diff --git a/tensorflow/tools/dockerfiles/Dockerfile b/tensorflow/tools/dockerfiles/assembler.Dockerfile similarity index 100% rename from tensorflow/tools/dockerfiles/Dockerfile rename to tensorflow/tools/dockerfiles/assembler.Dockerfile -- GitLab From a3960a0eeffac9467a5b3d4b525007d83ae137de Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Aug 2018 13:24:25 -0700 Subject: [PATCH 073/380] [SE] Avoid deadlock by calling HostCallbacks even when the stream is in an error state HostCallbacks may trigger notifications that, if elided, would cause programs to hang. Ideally we would have errback semantics, but this is a band-aid while the semantics are redefined. PiperOrigin-RevId: 209818770 --- tensorflow/stream_executor/stream.cc | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 3695c839b5..19d3b2389a 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -5287,12 +5287,11 @@ Stream &Stream::ThenTransformTensor(const dnn::BatchDescriptor &input_desc, Stream &Stream::ThenDoHostCallback(std::function callback) { VLOG_CALL(PARAM(callback)); - if (ok()) { - CheckError(parent_->HostCallback(this, callback)); - } else { + if (!ok()) { LOG(INFO) << DebugStreamPointers() << " was in error state before adding host callback"; } + CheckError(parent_->HostCallback(this, std::move(callback))); return *this; } @@ -5300,12 +5299,11 @@ Stream &Stream::ThenDoHostCallbackWithStatus( std::function callback) { VLOG_CALL(PARAM(callback)); - if (ok()) { - CheckError(parent_->HostCallback(this, std::move(callback))); - } else { - LOG(WARNING) << "stream " << DebugStreamPointers() - << " was in error state before adding host callback"; + if (!ok()) { + LOG(INFO) << DebugStreamPointers() + << " was in error state before adding host callback"; } + CheckError(parent_->HostCallback(this, std::move(callback))); return *this; } -- GitLab From 0454c5051d7b8b85699065d9528a1633587db994 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Wed, 22 Aug 2018 13:27:04 -0700 Subject: [PATCH 074/380] [TF:XLA] Use unique_id_ to compare computations for equality quickly. PiperOrigin-RevId: 209819199 --- tensorflow/compiler/xla/service/hlo_computation.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 70b18ff356..4c036ea1bf 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -682,6 +682,9 @@ ProgramShape HloComputation::ComputeProgramShape() const { } bool HloComputation::operator==(const HloComputation& other) const { + if (this == &other) { + return true; + } std::set> visited; std::function eq = [&visited, &eq](const HloInstruction* a, const HloInstruction* b) { -- GitLab From 6b318b9e4495aaff3601acb1c0f8750070c6b4a9 Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Wed, 22 Aug 2018 13:30:24 -0700 Subject: [PATCH 075/380] [tf.data] Cosmetic changes to tf.data optimizers: - Use graph_utils::GetInputNode in optimizers - Moved python optimization tests into their own files from optimize_dataset_op_test PiperOrigin-RevId: 209819734 --- .../contrib/data/python/kernel_tests/BUILD | 8 - .../python/kernel_tests/optimization/BUILD | 57 ++++ .../optimization/latency_all_edges_test.py | 58 ++++ .../map_and_filter_fusion_test.py | 166 ++++++++++ .../optimization/map_vectorization_test.py | 153 ++++++++++ .../kernel_tests/optimize_dataset_op_test.py | 288 ------------------ .../optimizers/data/map_and_batch_fusion.cc | 4 +- .../optimizers/data/map_and_filter_fusion.cc | 4 +- .../grappler/optimizers/data/map_fusion.cc | 3 +- .../optimizers/data/noop_elimination.cc | 3 +- .../data/shuffle_and_repeat_fusion.cc | 4 +- 11 files changed, 441 insertions(+), 307 deletions(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/optimization/BUILD create mode 100644 tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py create mode 100644 tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py create mode 100644 tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index cd46e382eb..220f1adf7f 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -229,17 +229,9 @@ py_test( srcs = ["optimize_dataset_op_test.py"], srcs_version = "PY2AND3", deps = [ - ":stats_dataset_test_base", - ":test_utils", "//tensorflow/contrib/data/python/ops:optimization", - "//tensorflow/contrib/data/python/ops:stats_ops", - "//tensorflow/python:check_ops", "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", "//tensorflow/python/data/ops:dataset_ops", "@absl_py//absl/testing:parameterized", ], diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD new file mode 100644 index 0000000000..f400af3201 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/optimization/BUILD @@ -0,0 +1,57 @@ +package(default_visibility = ["//tensorflow:internal"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "py_test") + +py_test( + name = "map_vectorization_test", + size = "small", + srcs = ["map_vectorization_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/kernel_tests:test_utils", + "//tensorflow/contrib/data/python/ops:optimization", + "//tensorflow/python:check_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python/data/ops:dataset_ops", + "@absl_py//absl/testing:parameterized", + ], +) + +py_test( + name = "map_and_filter_fusion_test", + size = "small", + srcs = ["map_and_filter_fusion_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/ops:optimization", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:math_ops", + "//tensorflow/python/data/ops:dataset_ops", + "@absl_py//absl/testing:parameterized", + ], +) + +py_test( + name = "latency_all_edges_test", + size = "small", + srcs = ["latency_all_edges_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/kernel_tests:stats_dataset_test_base", + "//tensorflow/contrib/data/python/ops:optimization", + "//tensorflow/contrib/data/python/ops:stats_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:errors", + "//tensorflow/python/data/ops:dataset_ops", + ], +) diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py new file mode 100644 index 0000000000..1850b6921a --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/optimization/latency_all_edges_test.py @@ -0,0 +1,58 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the LatencyAllEdges optimization.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.data.python.kernel_tests import stats_dataset_test_base +from tensorflow.contrib.data.python.ops import optimization +from tensorflow.contrib.data.python.ops import stats_ops +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import errors +from tensorflow.python.platform import test + + +class OptimizeStatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase): + + def testLatencyStatsOptimization(self): + + stats_aggregator = stats_ops.StatsAggregator() + dataset = dataset_ops.Dataset.from_tensors(1).apply( + optimization.assert_next( + ["LatencyStats", "Map", "LatencyStats", "Prefetch", + "LatencyStats"])).map(lambda x: x * x).prefetch(1).apply( + optimization.optimize(["latency_all_edges"])).apply( + stats_ops.set_stats_aggregator(stats_aggregator)) + iterator = dataset.make_initializable_iterator() + get_next = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run(iterator.initializer) + self.assertEqual(1 * 1, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + summary_str = sess.run(summary_t) + self._assertSummaryHasCount(summary_str, + "record_latency_TensorDataset/_1", 1) + self._assertSummaryHasCount(summary_str, "record_latency_MapDataset/_4", + 1) + self._assertSummaryHasCount(summary_str, + "record_latency_PrefetchDataset/_6", 1) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py new file mode 100644 index 0000000000..2d8a4a583d --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/optimization/map_and_filter_fusion_test.py @@ -0,0 +1,166 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the MapAndFilterFusion optimization.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.contrib.data.python.ops import optimization +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class MapAndFilterFusionTest(test.TestCase, parameterized.TestCase): + + @staticmethod + def map_functions(): + identity = lambda x: x + increment = lambda x: x + 1 + + def increment_and_square(x): + y = x + 1 + return y * y + + functions = [identity, increment, increment_and_square] + tests = [] + for i, fun1 in enumerate(functions): + for j, fun2 in enumerate(functions): + tests.append(( + "test_{}_{}".format(i, j), + [fun1, fun2], + )) + for k, fun3 in enumerate(functions): + tests.append(( + "test_{}_{}_{}".format(i, j, k), + [fun1, fun2, fun3], + )) + + swap = lambda x, n: (n, x) + tests.append(( + "swap1", + [lambda x: (x, 42), swap], + )) + tests.append(( + "swap2", + [lambda x: (x, 42), swap, swap], + )) + return tuple(tests) + + @parameterized.named_parameters(*map_functions.__func__()) + def testMapFusion(self, functions): + dataset = dataset_ops.Dataset.range(5).apply( + optimization.assert_next(["Map", "Prefetch"])) + for function in functions: + dataset = dataset.map(function) + + dataset = dataset.prefetch(0).apply(optimization.optimize(["map_fusion"])) + iterator = dataset.make_one_shot_iterator() + get_next = iterator.get_next() + with self.test_session() as sess: + for x in range(5): + result = sess.run(get_next) + r = x + for function in functions: + if isinstance(r, tuple): + r = function(*r) # Pass tuple as multiple arguments. + else: + r = function(r) + self.assertAllEqual(r, result) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + @staticmethod + def map_and_filter_functions(): + identity = lambda x: x + increment = lambda x: x + 1 + minus_five = lambda x: x - 5 + + def increment_and_square(x): + y = x + 1 + return y * y + + take_all = lambda x: constant_op.constant(True) + is_zero = lambda x: math_ops.equal(x, 0) + is_odd = lambda x: math_ops.equal(x % 2, 0) + greater = lambda x: math_ops.greater(x + 5, 0) + + functions = [identity, increment, minus_five, increment_and_square] + filters = [take_all, is_zero, is_odd, greater] + tests = [] + + for x, fun in enumerate(functions): + for y, predicate in enumerate(filters): + tests.append(("mixed_{}_{}".format(x, y), fun, predicate)) + + # Multi output + tests.append(("multiOne", lambda x: (x, x), + lambda x, y: constant_op.constant(True))) + tests.append( + ("multiTwo", lambda x: (x, 2), + lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0))) + return tuple(tests) + + @parameterized.named_parameters(*map_and_filter_functions.__func__()) + def testMapFilterFusion(self, function, predicate): + dataset = dataset_ops.Dataset.range(10).apply( + optimization.assert_next( + ["Map", + "FilterByLastComponent"])).map(function).filter(predicate).apply( + optimization.optimize(["map_and_filter_fusion"])) + self._testMapAndFilter(dataset, function, predicate) + + def _testMapAndFilter(self, dataset, function, predicate): + iterator = dataset.make_one_shot_iterator() + get_next = iterator.get_next() + with self.test_session() as sess: + for x in range(10): + r = function(x) + if isinstance(r, tuple): + b = predicate(*r) # Pass tuple as multiple arguments. + else: + b = predicate(r) + if sess.run(b): + result = sess.run(get_next) + self.assertAllEqual(r, result) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testAdditionalInputs(self): + a = constant_op.constant(3, dtype=dtypes.int64) + b = constant_op.constant(4, dtype=dtypes.int64) + some_tensor = math_ops.mul(a, b) + function = lambda x: x * x + + def predicate(y): + return math_ops.less(math_ops.cast(y, dtypes.int64), some_tensor) + + # We are currently not supporting functions with additional inputs. + dataset = dataset_ops.Dataset.range(10).apply( + optimization.assert_next( + ["Map", "Filter"])).map(function).filter(predicate).apply( + optimization.optimize(["map_and_filter_fusion"])) + + self._testMapAndFilter(dataset, function, predicate) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py new file mode 100644 index 0000000000..70ac13b1d5 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/optimization/map_vectorization_test.py @@ -0,0 +1,153 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the MapVectorization optimization.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.contrib.data.python.kernel_tests import test_utils +from tensorflow.contrib.data.python.ops import optimization +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.platform import test + + +class MapVectorizationTest(test_utils.DatasetTestBase, parameterized.TestCase): + + def _get_test_datasets(self, + base_dataset, + map_fn, + num_parallel_calls=None, + expect_optimized=True): + """Given base dataset and map fn, creates test datasets. + + Returns a tuple of (unoptimized, dataset, optimized dataset). The + unoptimized dataset has the assertion that Batch follows Map. The optimized + dataset has the assertion that Map follows Batch, and has the + "map_vectorization" optimization applied. + + Args: + base_dataset: Input dataset to map->batch + map_fn: Map function to use + num_parallel_calls: (Optional.) num_parallel_calls argument for map + expect_optimized: (Optional.) Whether we expect the optimization to take + place, in which case we will assert that Batch is followed by Map, + otherwise Map followed by Batch. Defaults to True. + + Returns: + Tuple of (unoptimized dataset, optimized dataset). + """ + map_node_name = "Map" if num_parallel_calls is None else "ParallelMap" + batch_size = 100 + + def _make_dataset(node_names): + return base_dataset.apply(optimization.assert_next(node_names)).map( + map_fn, num_parallel_calls=num_parallel_calls).batch(batch_size) + + unoptimized = _make_dataset([map_node_name, "Batch"]) + optimized = _make_dataset(["Batch", map_node_name] if expect_optimized else + [map_node_name, "Batch"]).apply( + optimization.optimize(["map_vectorization"])) + + return unoptimized, optimized + + @parameterized.named_parameters( + ("Basic", lambda x: (x, x + 1), None), + ("Parallel", lambda x: (x, x + 1), 12), + ("Gather", lambda x: array_ops.gather(x, 0), 12), + ) + def testOptimization(self, map_fn, num_parallel_calls): + base_dataset = dataset_ops.Dataset.from_tensor_slices([[1, 2], + [3, 4]]).repeat(5) + unoptimized, optimized = self._get_test_datasets(base_dataset, map_fn, + num_parallel_calls) + self._assert_datasets_equal(unoptimized, optimized) + + def testOptimizationBadMapFn(self): + # Test map functions that give an error + def map_fn(x): + # x has leading dimension 5, this will raise an error + return array_ops.gather(x, 10) + + base_dataset = dataset_ops.Dataset.range(5).repeat(5).batch( + 5, drop_remainder=True) + _, optimized = self._get_test_datasets(base_dataset, map_fn) + nxt = optimized.make_one_shot_iterator().get_next() + with self.assertRaisesRegexp(errors.InvalidArgumentError, + r"indices = 10 is not in \[0, 5\)"): + self.evaluate(nxt) + + def testOptimizationWithCapturedInputs(self): + # Tests that vectorization works with captured inputs + def map_fn(x): + return x + y + + y = constant_op.constant(1, shape=(2,)) + base_dataset = dataset_ops.Dataset.from_tensor_slices([[1, 2], + [3, 4]]).repeat(5) + # TODO(rachelim): when this optimization works, turn on expect_optimized + unoptimized, optimized = self._get_test_datasets( + base_dataset, map_fn, expect_optimized=False) + self._assert_datasets_equal(optimized, unoptimized) + + def testOptimizationIgnoreStateful(self): + + def map_fn(x): + with ops.control_dependencies([check_ops.assert_equal(x, 0)]): + return array_ops.identity(x) + + base_dataset = dataset_ops.Dataset.from_tensor_slices([[1, 2], + [3, 4]]).repeat(5) + _, optimized = self._get_test_datasets( + base_dataset, map_fn, expect_optimized=False) + nxt = optimized.make_one_shot_iterator().get_next() + + # NOTE: Right now, it raises an error because we can't save datasets that + # are stateful, and we rely on this saving mechanism to optimize datasets, + # so stateful functions can't be optimized. + with self.assertRaisesRegexp(errors.InvalidArgumentError, "[Ss]tateful"): + self.evaluate(nxt) + + def testOptimizationIgnoreRagged(self): + # Make sure we ignore inputs that might not be uniformly sized + def map_fn(x): + return array_ops.gather(x, 0) + + # output_shape = (?,) + base_dataset = dataset_ops.Dataset.range(20).batch(3, drop_remainder=False) + unoptimized, optimized = self._get_test_datasets( + base_dataset, map_fn, expect_optimized=False) + self._assert_datasets_equal(unoptimized, optimized) + + def testOptimizationIgnoreRaggedMap(self): + # Don't optimize when the output of the map fn shapes are unknown. + def map_fn(x): + return array_ops.tile(x, x) + + base_dataset = dataset_ops.Dataset.range(20).batch(1, drop_remainder=True) + unoptimized, optimized = self._get_test_datasets( + base_dataset, map_fn, expect_optimized=False) + self._assert_datasets_raise_same_error(unoptimized, optimized, + errors.InvalidArgumentError) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/optimize_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/optimize_dataset_op_test.py index 76aa1c3cfd..514adffa62 100644 --- a/tensorflow/contrib/data/python/kernel_tests/optimize_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/optimize_dataset_op_test.py @@ -19,18 +19,9 @@ from __future__ import print_function from absl.testing import parameterized -from tensorflow.contrib.data.python.kernel_tests import stats_dataset_test_base -from tensorflow.contrib.data.python.kernel_tests import test_utils from tensorflow.contrib.data.python.ops import optimization -from tensorflow.contrib.data.python.ops import stats_ops from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -120,285 +111,6 @@ class OptimizeDatasetTest(test.TestCase, parameterized.TestCase): "Function .* is not defined."): sess.run(get_next) - @staticmethod - def map_functions(): - identity = lambda x: x - increment = lambda x: x + 1 - - def increment_and_square(x): - y = x + 1 - return y * y - - functions = [identity, increment, increment_and_square] - tests = [] - for i, fun1 in enumerate(functions): - for j, fun2 in enumerate(functions): - tests.append(( - "test_{}_{}".format(i, j), - [fun1, fun2], - )) - for k, fun3 in enumerate(functions): - tests.append(( - "test_{}_{}_{}".format(i, j, k), - [fun1, fun2, fun3], - )) - - swap = lambda x, n: (n, x) - tests.append(( - "swap1", - [lambda x: (x, 42), swap], - )) - tests.append(( - "swap2", - [lambda x: (x, 42), swap, swap], - )) - return tuple(tests) - - @parameterized.named_parameters(*map_functions.__func__()) - def testMapFusion(self, functions): - dataset = dataset_ops.Dataset.range(5).apply( - optimization.assert_next(["Map", "Prefetch"])) - for function in functions: - dataset = dataset.map(function) - - dataset = dataset.prefetch(0).apply(optimization.optimize(["map_fusion"])) - iterator = dataset.make_one_shot_iterator() - get_next = iterator.get_next() - with self.test_session() as sess: - for x in range(5): - result = sess.run(get_next) - r = x - for function in functions: - if isinstance(r, tuple): - r = function(*r) # Pass tuple as multiple arguments. - else: - r = function(r) - self.assertAllEqual(r, result) - - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - @staticmethod - def map_and_filter_functions(): - identity = lambda x: x - increment = lambda x: x + 1 - minus_five = lambda x: x - 5 - - def increment_and_square(x): - y = x + 1 - return y * y - - take_all = lambda x: constant_op.constant(True) - is_zero = lambda x: math_ops.equal(x, 0) - is_odd = lambda x: math_ops.equal(x % 2, 0) - greater = lambda x: math_ops.greater(x + 5, 0) - - functions = [identity, increment, minus_five, increment_and_square] - filters = [take_all, is_zero, is_odd, greater] - tests = [] - - for x, fun in enumerate(functions): - for y, predicate in enumerate(filters): - tests.append(("mixed_{}_{}".format(x, y), fun, predicate)) - - # Multi output - tests.append(("multiOne", lambda x: (x, x), - lambda x, y: constant_op.constant(True))) - tests.append( - ("multiTwo", lambda x: (x, 2), - lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0))) - return tuple(tests) - - @parameterized.named_parameters(*map_and_filter_functions.__func__()) - def testMapFilterFusion(self, function, predicate): - dataset = dataset_ops.Dataset.range(10).apply( - optimization.assert_next( - ["Map", - "FilterByLastComponent"])).map(function).filter(predicate).apply( - optimization.optimize(["map_and_filter_fusion"])) - self._testMapAndFilter(dataset, function, predicate) - - def _testMapAndFilter(self, dataset, function, predicate): - iterator = dataset.make_one_shot_iterator() - get_next = iterator.get_next() - with self.test_session() as sess: - for x in range(10): - r = function(x) - if isinstance(r, tuple): - b = predicate(*r) # Pass tuple as multiple arguments. - else: - b = predicate(r) - if sess.run(b): - result = sess.run(get_next) - self.assertAllEqual(r, result) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testAdditionalInputs(self): - a = constant_op.constant(3, dtype=dtypes.int64) - b = constant_op.constant(4, dtype=dtypes.int64) - some_tensor = math_ops.mul(a, b) - function = lambda x: x * x - - def predicate(y): - return math_ops.less(math_ops.cast(y, dtypes.int64), some_tensor) - - # We are currently not supporting functions with additional inputs. - dataset = dataset_ops.Dataset.range(10).apply( - optimization.assert_next( - ["Map", "Filter"])).map(function).filter(predicate).apply( - optimization.optimize(["map_and_filter_fusion"])) - - self._testMapAndFilter(dataset, function, predicate) - - -class OptimizeStatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase): - - def testLatencyStatsOptimization(self): - - stats_aggregator = stats_ops.StatsAggregator() - dataset = dataset_ops.Dataset.from_tensors(1).apply( - optimization.assert_next( - ["LatencyStats", "Map", "LatencyStats", "Prefetch", - "LatencyStats"])).map(lambda x: x * x).prefetch(1).apply( - optimization.optimize(["latency_all_edges"])).apply( - stats_ops.set_stats_aggregator(stats_aggregator)) - iterator = dataset.make_initializable_iterator() - get_next = iterator.get_next() - summary_t = stats_aggregator.get_summary() - - with self.test_session() as sess: - sess.run(iterator.initializer) - self.assertEqual(1 * 1, sess.run(get_next)) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - summary_str = sess.run(summary_t) - self._assertSummaryHasCount(summary_str, - "record_latency_TensorDataset/_1", 1) - self._assertSummaryHasCount(summary_str, "record_latency_MapDataset/_4", - 1) - self._assertSummaryHasCount(summary_str, - "record_latency_PrefetchDataset/_6", 1) - - -class MapVectorizationTest(test_utils.DatasetTestBase, parameterized.TestCase): - - def _get_test_datasets(self, - base_dataset, - map_fn, - num_parallel_calls=None, - expect_optimized=True): - """Given base dataset and map fn, creates test datasets. - - Returns a tuple of (unoptimized, dataset, optimized dataset). The - unoptimized dataset has the assertion that Batch follows Map. The optimized - dataset has the assertion that Map follows Batch, and has the - "map_vectorization" optimization applied. - - Args: - base_dataset: Input dataset to map->batch - map_fn: Map function to use - num_parallel_calls: (Optional.) num_parallel_calls argument for map - expect_optimized: (Optional.) Whether we expect the optimization to take - place, in which case we will assert that Batch is followed by Map, - otherwise Map followed by Batch. Defaults to True. - - Returns: - Tuple of (unoptimized dataset, optimized dataset). - """ - map_node_name = "Map" if num_parallel_calls is None else "ParallelMap" - batch_size = 100 - - def _make_dataset(node_names): - return base_dataset.apply(optimization.assert_next(node_names)).map( - map_fn, num_parallel_calls=num_parallel_calls).batch(batch_size) - - unoptimized = _make_dataset([map_node_name, "Batch"]) - optimized = _make_dataset(["Batch", map_node_name] if expect_optimized else - [map_node_name, "Batch"]).apply( - optimization.optimize(["map_vectorization"])) - - return unoptimized, optimized - - @parameterized.named_parameters( - ("Basic", lambda x: (x, x + 1), None), - ("Parallel", lambda x: (x, x + 1), 12), - ("Gather", lambda x: array_ops.gather(x, 0), 12), - ) - def testOptimization(self, map_fn, num_parallel_calls): - base_dataset = dataset_ops.Dataset.from_tensor_slices([[1, 2], - [3, 4]]).repeat(5) - unoptimized, optimized = self._get_test_datasets(base_dataset, map_fn, - num_parallel_calls) - self._assert_datasets_equal(unoptimized, optimized) - - def testOptimizationBadMapFn(self): - # Test map functions that give an error - def map_fn(x): - # x has leading dimension 5, this will raise an error - return array_ops.gather(x, 10) - - base_dataset = dataset_ops.Dataset.range(5).repeat(5).batch( - 5, drop_remainder=True) - _, optimized = self._get_test_datasets(base_dataset, map_fn) - nxt = optimized.make_one_shot_iterator().get_next() - with self.assertRaisesRegexp(errors.InvalidArgumentError, - r"indices = 10 is not in \[0, 5\)"): - self.evaluate(nxt) - - def testOptimizationWithCapturedInputs(self): - # Tests that vectorization works with captured inputs - def map_fn(x): - return x + y - - y = constant_op.constant(1, shape=(2,)) - base_dataset = dataset_ops.Dataset.from_tensor_slices([[1, 2], - [3, 4]]).repeat(5) - # TODO(rachelim): when this optimization works, turn on expect_optimized - unoptimized, optimized = self._get_test_datasets( - base_dataset, map_fn, expect_optimized=False) - self._assert_datasets_equal(optimized, unoptimized) - - def testOptimizationIgnoreStateful(self): - - def map_fn(x): - with ops.control_dependencies([check_ops.assert_equal(x, 0)]): - return array_ops.identity(x) - - base_dataset = dataset_ops.Dataset.from_tensor_slices([[1, 2], - [3, 4]]).repeat(5) - _, optimized = self._get_test_datasets( - base_dataset, map_fn, expect_optimized=False) - nxt = optimized.make_one_shot_iterator().get_next() - - # NOTE: Right now, it raises an error because we can't save datasets that - # are stateful, and we rely on this saving mechanism to optimize datasets, - # so stateful functions can't be optimized. - with self.assertRaisesRegexp(errors.InvalidArgumentError, "[Ss]tateful"): - self.evaluate(nxt) - - def testOptimizationIgnoreRagged(self): - # Make sure we ignore inputs that might not be uniformly sized - def map_fn(x): - return array_ops.gather(x, 0) - - # output_shape = (?,) - base_dataset = dataset_ops.Dataset.range(20).batch(3, drop_remainder=False) - unoptimized, optimized = self._get_test_datasets( - base_dataset, map_fn, expect_optimized=False) - self._assert_datasets_equal(unoptimized, optimized) - - def testOptimizationIgnoreRaggedMap(self): - # Don't optimize when the output of the map fn shapes are unknown. - def map_fn(x): - return array_ops.tile(x, x) - - base_dataset = dataset_ops.Dataset.range(20).batch(1, drop_remainder=True) - unoptimized, optimized = self._get_test_datasets( - base_dataset, map_fn, expect_optimized=False) - self._assert_datasets_raise_same_error(unoptimized, optimized, - errors.InvalidArgumentError) - if __name__ == "__main__": test.main() diff --git a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc index 3ce238a30a..e9ad6f1b8a 100644 --- a/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc +++ b/tensorflow/core/grappler/optimizers/data/map_and_batch_fusion.cc @@ -104,8 +104,8 @@ Status MapAndBatchFusion::Optimize(Cluster* cluster, const GrapplerItem& item, // Use a more descriptive variable name now that we know the node type. const NodeDef& batch_node = node; - GraphView::InputPort input_port = graph.GetInputPort(batch_node.name(), 0); - NodeDef* node2 = graph.GetRegularFanin(input_port).node; + NodeDef* node2 = graph_utils::GetInputNode(batch_node, graph); + if (node2->op() != "MapDataset" && node2->op() != "ParallelMapDataset") { continue; } diff --git a/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion.cc b/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion.cc index 5e76c9f819..a411e641f7 100644 --- a/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion.cc +++ b/tensorflow/core/grappler/optimizers/data/map_and_filter_fusion.cc @@ -128,10 +128,8 @@ Status MapAndFilterFusion::Optimize(Cluster* cluster, const GrapplerItem& item, const NodeDef* filter_node = get_filter_node(node); if (!filter_node) continue; - GraphView::InputPort input_port = - graph.GetInputPort(filter_node->name(), 0); const NodeDef* map_node = - get_map_node(*graph.GetRegularFanin(input_port).node); + get_map_node(*graph_utils::GetInputNode(*filter_node, graph)); if (!map_node) continue; const auto* fused_function = make_fused_function(map_node, filter_node); diff --git a/tensorflow/core/grappler/optimizers/data/map_fusion.cc b/tensorflow/core/grappler/optimizers/data/map_fusion.cc index feb370eb9d..dc0509f7a5 100644 --- a/tensorflow/core/grappler/optimizers/data/map_fusion.cc +++ b/tensorflow/core/grappler/optimizers/data/map_fusion.cc @@ -102,9 +102,8 @@ Status MapFusion::Optimize(Cluster* cluster, const GrapplerItem& item, const NodeDef* map_node = get_map_node(node); if (!map_node) continue; - GraphView::InputPort input_port = graph.GetInputPort(map_node->name(), 0); const NodeDef* parent_map_node = - get_map_node(*graph.GetRegularFanin(input_port).node); + get_map_node(*graph_utils::GetInputNode(*map_node, graph)); if (!parent_map_node) continue; const auto* fused_function = get_fused_function(parent_map_node, map_node); diff --git a/tensorflow/core/grappler/optimizers/data/noop_elimination.cc b/tensorflow/core/grappler/optimizers/data/noop_elimination.cc index 55d57b3b97..a26f1000a3 100644 --- a/tensorflow/core/grappler/optimizers/data/noop_elimination.cc +++ b/tensorflow/core/grappler/optimizers/data/noop_elimination.cc @@ -69,8 +69,7 @@ Status NoOpElimination::Optimize(Cluster* cluster, const GrapplerItem& item, for (const NodeDef& node : item.graph.node()) { if (!IsNoOp(node, graph)) continue; - GraphView::InputPort input_port = graph.GetInputPort(node.name(), 0); - NodeDef* const parent = graph.GetRegularFanin(input_port).node; + NodeDef* const parent = graph_utils::GetInputNode(node, graph); graph.ReplaceInput(node, *parent); nodes_to_delete.insert(node.name()); diff --git a/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc b/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc index 7c7161c5b2..cb0ff670e8 100644 --- a/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc +++ b/tensorflow/core/grappler/optimizers/data/shuffle_and_repeat_fusion.cc @@ -76,8 +76,8 @@ Status ShuffleAndRepeatFusion::Optimize(Cluster* cluster, // Use a more descriptive variable name now that we know the node type. const NodeDef& repeat_node = node; - GraphView::InputPort input_port = graph.GetInputPort(repeat_node.name(), 0); - NodeDef* node2 = graph.GetRegularFanin(input_port).node; + NodeDef* node2 = graph_utils::GetInputNode(repeat_node, graph); + if (node2->op() != "ShuffleDataset") { continue; } -- GitLab From 4f7a169a7eb97ea4819217f14705d6c2bd125355 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 22 Aug 2018 13:38:15 -0700 Subject: [PATCH 076/380] [XLA] Define integer division overflow for CPU/GPU This implements the semantics suggested in cr/209704042 for CPU and GPU: X / 0 == -1 X % 0 == X INT_SMIN / -1 = INT_SMIN INT_SMIN % -1 = 0 PiperOrigin-RevId: 209821097 --- .../xla/service/elemental_ir_emitter.cc | 95 ++++++++++++++++++- .../xla/service/elemental_ir_emitter.h | 15 +++ .../xla/tests/array_elementwise_ops_test.cc | 6 +- .../performance/xla/operation_semantics.md | 4 + 4 files changed, 112 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 4b19aa5df9..215af562a5 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -1100,6 +1100,95 @@ static llvm::Value* SaturateShiftIfNecessary(llvm::IRBuilder<>* b, return b->CreateSelect(shift_amt_in_range, shift_result, saturated_value); } +llvm::Value* ElementalIrEmitter::GetOne(llvm::Type* type) const { + return llvm::ConstantInt::get(llvm::cast(type), 1); +} + +llvm::Value* ElementalIrEmitter::GetZero(llvm::Type* type) const { + return llvm::ConstantInt::get(llvm::cast(type), 0); +} + +llvm::Value* ElementalIrEmitter::GetIntSMin(llvm::Type* type) const { + auto* integer_type = llvm::cast(type); + return llvm::ConstantInt::get(integer_type, llvm::APInt::getSignedMinValue( + integer_type->getBitWidth())); +} + +llvm::Value* ElementalIrEmitter::GetMinusOne(llvm::Type* type) const { + auto* integer_type = llvm::cast(type); + return llvm::ConstantInt::get( + integer_type, llvm::APInt::getAllOnesValue(integer_type->getBitWidth())); +} + +llvm::Value* ElementalIrEmitter::IsZero(llvm::Value* v) const { + return b_->CreateICmpEQ(v, llvm::ConstantInt::get(v->getType(), 0)); +} + +llvm::Value* ElementalIrEmitter::IsIntMinDivisionOverflow( + llvm::Value* lhs, llvm::Value* rhs) const { + return b_->CreateAnd(b_->CreateICmpEQ(lhs, GetIntSMin(lhs->getType())), + b_->CreateICmpEQ(rhs, GetMinusOne(rhs->getType()))); +} + +llvm::Value* ElementalIrEmitter::Select(llvm::Value* cond, llvm::Value* if_true, + llvm::Value* if_false) const { + return b_->CreateSelect(cond, if_true, if_false); +} + +llvm::Value* ElementalIrEmitter::EmitIntegerDivide(llvm::Value* lhs, + llvm::Value* rhs, + bool is_signed) const { + // Integer division overflow behavior: + // + // X / 0 == -1 + // INT_SMIN /s -1 = INT_SMIN + + if (!is_signed) { + llvm::Value* udiv_is_unsafe = IsZero(rhs); + llvm::Value* safe_rhs = Select(udiv_is_unsafe, GetOne(lhs->getType()), rhs); + llvm::Value* safe_div = b_->CreateUDiv(lhs, safe_rhs); + return Select(udiv_is_unsafe, GetMinusOne(lhs->getType()), safe_div); + } + + llvm::Value* has_zero_divisor = IsZero(rhs); + llvm::Value* has_int_min_overflow = IsIntMinDivisionOverflow(lhs, rhs); + llvm::Value* sdiv_is_unsafe = + b_->CreateOr(has_int_min_overflow, has_zero_divisor); + llvm::Value* safe_rhs = Select(sdiv_is_unsafe, GetOne(lhs->getType()), rhs); + llvm::Value* safe_div = b_->CreateSDiv(lhs, safe_rhs); + + return Select( + has_zero_divisor, GetMinusOne(lhs->getType()), + Select(has_int_min_overflow, GetIntSMin(lhs->getType()), safe_div)); +} + +llvm::Value* ElementalIrEmitter::EmitIntegerRemainder(llvm::Value* lhs, + llvm::Value* rhs, + bool is_signed) const { + // Integer remainder overflow behavior: + // + // X % 0 == X + // INT_SMIN %s -1 = 0 + + if (!is_signed) { + llvm::Value* urem_is_unsafe = IsZero(rhs); + llvm::Value* safe_rhs = Select(urem_is_unsafe, GetOne(lhs->getType()), rhs); + llvm::Value* safe_rem = b_->CreateURem(lhs, safe_rhs); + return Select(urem_is_unsafe, lhs, safe_rem); + } + + llvm::Value* has_zero_divisor = IsZero(rhs); + llvm::Value* has_int_min_overflow = IsIntMinDivisionOverflow(lhs, rhs); + llvm::Value* srem_is_unsafe = + b_->CreateOr(has_int_min_overflow, has_zero_divisor); + llvm::Value* safe_rhs = Select(srem_is_unsafe, GetOne(lhs->getType()), rhs); + llvm::Value* safe_rem = b_->CreateSRem(lhs, safe_rhs); + + return Select( + has_zero_divisor, lhs, + Select(has_int_min_overflow, GetZero(lhs->getType()), safe_rem)); +} + StatusOr ElementalIrEmitter::EmitIntegerBinaryOp( const HloInstruction* op, llvm::Value* lhs_value, llvm::Value* rhs_value, bool is_signed) const { @@ -1112,11 +1201,9 @@ StatusOr ElementalIrEmitter::EmitIntegerBinaryOp( case HloOpcode::kMultiply: return b_->CreateMul(lhs_value, rhs_value); case HloOpcode::kDivide: - return is_signed ? b_->CreateSDiv(lhs_value, rhs_value) - : b_->CreateUDiv(lhs_value, rhs_value); + return EmitIntegerDivide(lhs_value, rhs_value, is_signed); case HloOpcode::kRemainder: - return is_signed ? b_->CreateSRem(lhs_value, rhs_value) - : b_->CreateURem(lhs_value, rhs_value); + return EmitIntegerRemainder(lhs_value, rhs_value, is_signed); case HloOpcode::kEq: return llvm_ir::EmitComparison(llvm::CmpInst::ICMP_EQ, lhs_value, rhs_value, b_); diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/elemental_ir_emitter.h index 1598a4dd85..c037b98929 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.h +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.h @@ -65,6 +65,21 @@ class ElementalIrEmitter { virtual StatusOr EmitComplexUnaryOp( const HloInstruction* op, llvm::Value* operand_value) const; + llvm::Value* IsZero(llvm::Value* v) const; + llvm::Value* IsIntMinDivisionOverflow(llvm::Value* lhs, + llvm::Value* rhs) const; + llvm::Value* GetZero(llvm::Type* type) const; + llvm::Value* GetOne(llvm::Type* type) const; + llvm::Value* GetIntSMin(llvm::Type* type) const; + llvm::Value* GetMinusOne(llvm::Type* type) const; + llvm::Value* Select(llvm::Value* cond, llvm::Value* if_true, + llvm::Value* if_false) const; + + llvm::Value* EmitIntegerDivide(llvm::Value* lhs, llvm::Value* rhs, + bool is_signed) const; + llvm::Value* EmitIntegerRemainder(llvm::Value* lhs, llvm::Value* rhs, + bool is_signed) const; + virtual StatusOr EmitIntegerBinaryOp(const HloInstruction* op, llvm::Value* lhs_value, llvm::Value* rhs_value, diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index 316ab26a1f..84c5b6e549 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -498,8 +498,7 @@ XLA_TEST_F(IntegerDivideOpTest, DivS32s) { TestDivRem(dividends, divisors, quotients, remainders); } -XLA_TEST_F(IntegerDivideOpTest, - DISABLED_ON_CPU(DISABLED_ON_GPU(SignedOverflow))) { +XLA_TEST_F(IntegerDivideOpTest, SignedOverflow) { std::vector dividends = {5, INT32_MIN}, divisors = {0, -1}, quotients = {-1, INT32_MIN}, remainders = {5, 0}; @@ -529,8 +528,7 @@ XLA_TEST_F(IntegerDivideOpTest, DivU32s) { TestDivRem(dividends, divisors, quotients, remainders); } -XLA_TEST_F(IntegerDivideOpTest, - DISABLED_ON_CPU(DISABLED_ON_GPU(UnsignedOverflow))) { +XLA_TEST_F(IntegerDivideOpTest, UnsignedOverflow) { std::vector dividends = {5}, divisors = {0}, quotients = {-1}, remainders = {5}; diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 2de30d1b3d..c23a7ad9e2 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -1036,6 +1036,10 @@ different ranks are *not* supported, unless one of the operands is a scalar. When `Op` is `Rem`, the sign of the result is taken from the dividend, and the absolute value of the result is always less than the divisor's absolute value. +Integer division overflow (signed/unsigned division/remainder by zero or signed +divison/remainder of `INT_SMIN` with `-1`) produces an implementation defined +value. + An alternative variant with different-rank broadcasting support exists for these operations: -- GitLab From 401cb1a1f73364b49a1b46c16e95b17bc7f7786a Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 22 Aug 2018 13:50:14 -0700 Subject: [PATCH 077/380] Add assert_existing_objects_matched to object-based checkpoint restores Useful when loading only part of a larger checkpoint, to assert that the Variable objects which do exist in the current program are being restored. PiperOrigin-RevId: 209823112 --- .../python/training/checkpointable/util.py | 61 +++++++++++++++++-- .../training/checkpointable/util_test.py | 24 +++++++- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/training/checkpointable/util.py b/tensorflow/python/training/checkpointable/util.py index f49ed5c9ff..d1b50d1362 100644 --- a/tensorflow/python/training/checkpointable/util.py +++ b/tensorflow/python/training/checkpointable/util.py @@ -833,6 +833,11 @@ class _LoadStatus(object): """Raises an exception unless a non-trivial restoration has completed.""" pass + @abc.abstractmethod + def assert_existing_objects_matched(self): + """Raises an exception unless existing Python objects have been matched.""" + pass + @abc.abstractmethod def run_restore_ops(self, session=None): """Runs restore ops from the checkpoint. Requires a valid checkpoint.""" @@ -903,13 +908,11 @@ class CheckpointLoadStatus(_LoadStatus): or if there are any checkpointed values which have not been matched to Python objects. """ + self.assert_existing_objects_matched() for node_id, node in enumerate(self._checkpoint.object_graph_proto.nodes): checkpointable = self._checkpoint.object_by_proto_id.get(node_id, None) if checkpointable is None: raise AssertionError("Unresolved object in checkpoint: %s" % (node,)) - if checkpointable._update_uid < self._checkpoint.restore_uid: # pylint: disable=protected-access - raise AssertionError( - "Object not assigned a value from checkpoint: %s" % (node,)) if self._checkpoint.slot_restorations: # Sanity check; this collection should be clear if everything has been # restored. @@ -920,6 +923,31 @@ class CheckpointLoadStatus(_LoadStatus): ("Unused attributes in these objects (the attributes exist in the " "checkpoint but not in the objects): %s") % ( self._checkpoint.unused_attributes.items(),)) + return self + + def assert_existing_objects_matched(self): + """Asserts that checkpointable Python objects have been matched. + + Note that this is a weaker assertion than `assert_consumed`. It will only + fail for existing Python objects which are (transitive) dependencies of the + root object and which do not have an entry in the checkpoint. + + It will not fail, for example, if a `tf.keras.Layer` object has not yet been + built and so has not created any `tf.Variable` objects. + + Returns: + `self` for chaining. + + Raises: + AssertionError: If a Python object exists in the transitive dependencies + of the root object but does not have a value in the checkpoint. + """ + for node_id, node in enumerate(self._checkpoint.object_graph_proto.nodes): + checkpointable = self._checkpoint.object_by_proto_id.get(node_id, None) + if (checkpointable is not None + and checkpointable._update_uid < self._checkpoint.restore_uid): # pylint: disable=protected-access + raise AssertionError( + "Object not assigned a value from checkpoint: %s" % (node,)) for checkpointable_object in list_objects(self._root_checkpointable): self._checkpoint.all_python_objects.add(checkpointable_object) unused_python_objects = ( @@ -929,7 +957,7 @@ class CheckpointLoadStatus(_LoadStatus): raise AssertionError( ("Some Python objects were not bound to checkpointed values, likely " "due to changes in the Python program: %s") - % (unused_python_objects,)) + % (list(unused_python_objects),)) return self def run_restore_ops(self, session=None): @@ -991,6 +1019,11 @@ class InitializationOnlyStatus(_LoadStatus): raise AssertionError( "No checkpoint specified (save_path=None); nothing is being restored.") + def assert_existing_objects_matched(self): + """Assertion for consistency with `CheckpointLoadStatus`. Always fails.""" + raise AssertionError( + "No checkpoint specified (save_path=None); nothing is being restored.") + def run_restore_ops(self, session=None): """For consistency with `CheckpointLoadStatus`. @@ -1064,6 +1097,15 @@ class NameBasedSaverStatus(_LoadStatus): if checkpointable._update_uid < self._checkpoint.restore_uid: raise AssertionError("Object not restored: %s" % (checkpointable,)) # pylint: enable=protected-access + return self + + def assert_existing_objects_matched(self): + """Raises an exception if currently created objects are unmatched.""" + # For name-based checkpoints there's no object information in the + # checkpoint, so there's no distinction between + # assert_existing_objects_matched and assert_consumed (and both are less + # useful since we don't touch Python objects or Python state). + return self.assert_consumed() def _gather_saveable_objects(self): """Walk the object graph, using global names for SaveableObjects.""" @@ -1647,6 +1689,17 @@ class Checkpoint(tracking.Checkpointable): Python objects in the dependency graph with no values in the checkpoint. This method returns the status object, and so may be chained with `initialize_or_restore` or `run_restore_ops`. + - `assert_existing_objects_matched()`: + Raises an exception if any existing Python objects in the dependency + graph are unmatched. Unlike `assert_consumed`, this assertion will + pass if values in the checkpoint have no corresponding Python + objects. For example a `tf.keras.Layer` object which has not yet been + built, and so has not created any variables, will pass this assertion + but fail `assert_consumed`. Useful when loading part of a larger + checkpoint into a new Python program, e.g. a training checkpoint with + a `tf.train.Optimizer` was saved but only the state required for + inference is being loaded. This method returns the status object, and + so may be chained with `initialize_or_restore` or `run_restore_ops`. - `initialize_or_restore(session=None)`: When graph building, runs variable initializers if `save_path` is `None`, but otherwise runs restore operations. If no `session` is diff --git a/tensorflow/python/training/checkpointable/util_test.py b/tensorflow/python/training/checkpointable/util_test.py index 522167b49c..ecadc56871 100644 --- a/tensorflow/python/training/checkpointable/util_test.py +++ b/tensorflow/python/training/checkpointable/util_test.py @@ -437,6 +437,9 @@ class CheckpointingTests(test.TestCase): optimizer=on_create_optimizer, model=on_create_model) # Deferred restoration status = on_create_root.restore(save_path=save_path) + status.assert_existing_objects_matched() + with self.assertRaises(AssertionError): + status.assert_consumed() on_create_model(constant_op.constant([[3.]])) # create variables self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) self.assertAllEqual([42.], @@ -444,6 +447,9 @@ class CheckpointingTests(test.TestCase): on_create_model._named_dense.variables[1])) on_create_m_bias_slot = on_create_optimizer.get_slot( on_create_model._named_dense.variables[1], "m") + status.assert_existing_objects_matched() + with self.assertRaises(AssertionError): + status.assert_consumed() # Optimizer slot variables are created when the original variable is # restored. self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) @@ -451,6 +457,7 @@ class CheckpointingTests(test.TestCase): self.evaluate(on_create_optimizer.variables())) dummy_var = resource_variable_ops.ResourceVariable([1.]) on_create_optimizer.minimize(loss=dummy_var.read_value) + status.assert_existing_objects_matched() status.assert_consumed() beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators() self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power)) @@ -506,8 +513,11 @@ class CheckpointingTests(test.TestCase): self.assertEqual(0, training_continuation) with self.assertRaises(AssertionError): status.assert_consumed() + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() else: status.assert_consumed() + status.assert_existing_objects_matched() for _ in range(num_training_steps): session.run(train_op) root.save(file_prefix=checkpoint_prefix, session=session) @@ -704,11 +714,12 @@ class CheckpointingTests(test.TestCase): load_into = LateDependencies() status = checkpointable_utils.CheckpointableSaver( load_into).restore(save_path) + status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() load_into.add_dep() status.assert_consumed() - status.run_restore_ops() + status.assert_existing_objects_matched().run_restore_ops() self.assertEqual(123., self.evaluate(load_into.dep.var)) @test_util.run_in_graph_and_eager_modes @@ -785,6 +796,7 @@ class CheckpointingTests(test.TestCase): no_slot_status.run_restore_ops() self.assertEqual(12., self.evaluate(new_root.var)) new_root.optimizer = adam.AdamOptimizer(0.1) + slot_status.assert_existing_objects_matched() with self.assertRaisesRegexp(AssertionError, "beta1_power"): slot_status.assert_consumed() self.assertEqual(12., self.evaluate(new_root.var)) @@ -884,6 +896,8 @@ class CheckpointingTests(test.TestCase): load_root.dep_one.dep_three, name="var", initializer=0.) with self.assertRaises(AssertionError): status.assert_consumed() + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() @test_util.run_in_graph_and_eager_modes def testObjectsCombined(self): @@ -907,7 +921,7 @@ class CheckpointingTests(test.TestCase): v2 = checkpointable_utils.add_variable( load_root.dep_one, name="var2", shape=[], dtype=dtypes.float64) status = checkpointable_utils.CheckpointableSaver(load_root).restore( - save_path).assert_consumed() + save_path).assert_consumed().assert_existing_objects_matched() status.run_restore_ops() self.assertEqual(32., self.evaluate(v1)) self.assertEqual(64., self.evaluate(v2)) @@ -1238,6 +1252,8 @@ class CheckpointingTests(test.TestCase): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() train_fn() + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() @@ -1451,11 +1467,15 @@ class CheckpointCompatibilityTests(test.TestCase): if context.executing_eagerly(): with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"): status.assert_consumed() + with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"): + status.assert_existing_objects_matched() else: # When graph building, we haven't read any keys, so we don't know # whether the restore will be complete. with self.assertRaisesRegexp(AssertionError, "not restored"): status.assert_consumed() + with self.assertRaisesRegexp(AssertionError, "not restored"): + status.assert_existing_objects_matched() status.run_restore_ops() self._check_sentinels(root) self._set_sentinels(root) -- GitLab From 7acfb875a0217777287a299ea8013e16fca59d4e Mon Sep 17 00:00:00 2001 From: Zafarali Ahmed Date: Wed, 22 Aug 2018 13:51:47 -0700 Subject: [PATCH 078/380] Add copy and deepcopy functionality for resource_variable_ops.ResourceVariable. PiperOrigin-RevId: 209823375 --- tensorflow/python/keras/models_test.py | 48 +++++++++++++++++++ .../resource_variable_ops_test.py | 22 +++++++++ .../python/ops/resource_variable_ops.py | 16 +++++++ 3 files changed, 86 insertions(+) diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py index 5f755f7b5e..1d0f56f3c8 100644 --- a/tensorflow/python/keras/models_test.py +++ b/tensorflow/python/keras/models_test.py @@ -18,18 +18,36 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import copy import os import numpy as np from tensorflow.python import keras +from tensorflow.python.eager import context from tensorflow.python.framework import test_util from tensorflow.python.keras import metrics from tensorflow.python.keras import models +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import test from tensorflow.python.training import adam +class TestModel(keras.Model): + """A model subclass.""" + + def __init__(self, n_outputs=4, trainable=True): + """A test class with one dense layer and number of outputs as a variable.""" + super(TestModel, self).__init__() + self.layer1 = keras.layers.Dense(n_outputs) + self.n_outputs = resource_variable_ops.ResourceVariable( + n_outputs, trainable=trainable) + + def call(self, x): + return self.layer1(x) + + class TestModelCloning(test.TestCase): def test_clone_sequential_model(self): @@ -187,6 +205,36 @@ class TestModelBackend(test.TestCase): keras.backend.set_floatx(floatx) +class TestModelDeepCopy(test.TestCase): + + def test_deep_copy_eager_mode_trainable(self): + with context.eager_mode(): + x = random_ops.random_normal((32, 4)) + model = TestModel(trainable=True) + model(x) # Initialize Variables. + model_copy = copy.deepcopy(model) + self.assertEqual(len(model_copy.trainable_variables), 3) + model_copy.n_outputs.assign(1200) + self.assertFalse( + np.allclose(model_copy.n_outputs.numpy(), + model.n_outputs.numpy())) + + def test_deep_copy_eager_mode_not_trainable(self): + with context.eager_mode(): + x = random_ops.random_normal((32, 4)) + model = TestModel(trainable=False) + model(x) + model_copy = copy.deepcopy(model) + self.assertEqual(len(model_copy.trainable_variables), 2) + + weights = model_copy.get_weights() + weights = [w * 4 for w in weights] + model_copy.set_weights(weights) + self.assertFalse( + np.allclose(model.get_weights()[0], + model_copy.get_weights()[0])) + + class TestCloneAndBuildModel(test.TestCase): def test_clone_and_build_non_compiled_model(self): diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index f815348b2a..d31aba766b 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import copy import gc import numpy as np @@ -106,6 +107,27 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): v = resource_variable_ops.ResourceVariable(False, name="bool_test") self.assertAllEqual(bool(v), False) + def testEagerDeepCopy(self): + with context.eager_mode(): + init_value = np.ones((4, 4, 4)) + variable = resource_variable_ops.ResourceVariable(init_value, + name="init") + + copied_variable = copy.deepcopy(variable) + copied_variable.assign(4 * np.ones((4, 4, 4))) + + # Copying the variable should create a new underlying tensor with distinct + # values. + self.assertFalse(np.allclose(variable.numpy(), copied_variable.numpy())) + + def testGraphDeepCopy(self): + with self.test_session(): + init_value = np.ones((4, 4, 4)) + variable = resource_variable_ops.ResourceVariable(init_value, + name="init") + with self.assertRaises(NotImplementedError): + copy.deepcopy(variable) + @test_util.run_in_graph_and_eager_modes def testStridedSliceAssign(self): v = resource_variable_ops.ResourceVariable([1.0, 2.0]) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index c5bc74132e..5c25cba4f2 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -586,6 +586,22 @@ class ResourceVariable(variables.RefVariable): def __bool__(self): return bool(self.read_value()) + def __copy__(self): + return self + + def __deepcopy__(self, memo): + if not context.executing_eagerly(): + raise NotImplementedError( + "__deepcopy__() is only available when eager execution is enabled.") + copied_variable = ResourceVariable( + initial_value=self.read_value(), + trainable=self._trainable, + constraint=self._constraint, + dtype=self._dtype, + name=self._shared_name + "_copy") + memo[self._unique_id] = copied_variable + return copied_variable + @property def dtype(self): """The dtype of this variable.""" -- GitLab From 21cdc5b13e3eeb873f92648f229ad29b3b7b1129 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Aug 2018 13:56:55 -0700 Subject: [PATCH 079/380] Making the side effect of result() function of tfe.Metrics.Mean optional. PiperOrigin-RevId: 209824328 --- .../contrib/eager/python/metrics_impl.py | 22 +++++++++++++++++-- .../contrib/eager/python/metrics_test.py | 22 +++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py index 6efafccd6b..930e62b680 100644 --- a/tensorflow/contrib/eager/python/metrics_impl.py +++ b/tensorflow/contrib/eager/python/metrics_impl.py @@ -336,9 +336,27 @@ class Mean(Metric): return values return values, weights - def result(self): + def result(self, write_summary=True): + """Returns the result of the Metric. + + Args: + write_summary: bool indicating whether to feed the result to the summary + before returning. + Returns: + aggregated metric as float. + Raises: + ValueError: if the optional argument is not bool + """ + # Convert the boolean to tensor for tf.cond, if it is not. + if not isinstance(write_summary, ops.Tensor): + write_summary = ops.convert_to_tensor(write_summary) t = self.numer / self.denom - summary_ops.scalar(name=self.name, tensor=t) + def write_summary_f(): + summary_ops.scalar(name=self.name, tensor=t) + return t + control_flow_ops.cond(write_summary, + write_summary_f, + lambda: t) return t diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py index 20d938d492..aa99616810 100644 --- a/tensorflow/contrib/eager/python/metrics_test.py +++ b/tensorflow/contrib/eager/python/metrics_test.py @@ -46,6 +46,18 @@ class MetricsTest(test.TestCase): self.assertEqual(dtypes.float64, m.dtype) self.assertEqual(dtypes.float64, m.result().dtype) + def testSummaryArg(self): + m = metrics.Mean() + m([1, 10, 100]) + m(1000) + m([10000.0, 100000.0]) + self.assertEqual(111111.0/6, m.result(write_summary=True).numpy()) + self.assertEqual(111111.0/6, m.result(write_summary=False).numpy()) + with self.assertRaises(ValueError): + m.result(write_summary=5) + with self.assertRaises(ValueError): + m.result(write_summary=[True]) + def testVariableCollections(self): with context.graph_mode(), ops.Graph().as_default(): m = metrics.Mean() @@ -93,6 +105,16 @@ class MetricsTest(test.TestCase): self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].simple_value, 37.0) + # Get result without saving the summary. + logdir = tempfile.mkdtemp() + with summary_ops.create_file_writer( + logdir, max_queue=0, + name="t0").as_default(), summary_ops.always_record_summaries(): + m.result(write_summary=False) # As a side-effect will write summaries. + # events_from_logdir(_) asserts the directory exists. + events = summary_test_util.events_from_logdir(logdir) + self.assertEqual(len(events), 1) + def testWeightedMean(self): m = metrics.Mean() m([1, 100, 100000], weights=[1, 0.2, 0.3]) -- GitLab From 4ec83b54620a7c4cc654a1c256115d07d4987eb5 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Wed, 22 Aug 2018 14:11:13 -0700 Subject: [PATCH 080/380] Add the device (stack) to tfe.defun's cache key, and copy the device stack to FuncGraph. This change makes tfe.defun() responsible for respecting call-site device annotations, not PartitionedCallOp --- the latter doesn't have access to the original requested device, and it was overly-aggressive in stamping its assigned device onto unassigned function nodes. It's not obvious how we could make PartitionedCallOp faithfully respect call-site device annotations, since directives like tf.device(None) are lost by the time control reaches PartitionedCallOp. As such, this change GraphModeFunction.__call__() raise an error if it's called under a device context / stack that's different than the one that was active when the python function was traced. PiperOrigin-RevId: 209827111 --- .../core/kernels/partitioned_function_ops.cc | 24 +---- tensorflow/python/eager/function.py | 46 +++++++-- tensorflow/python/eager/function_test.py | 94 ++++++++++++++----- tensorflow/python/framework/device.py | 38 ++++++-- .../kernel_tests/functional_ops_test.py | 21 +---- 5 files changed, 146 insertions(+), 77 deletions(-) diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index 8db78f9784..876a1704c7 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -98,8 +98,7 @@ class PartitionedCallOp : public AsyncOpKernel { done); auto graph = tensorflow::MakeUnique(fbody->graph->flib_def()); CopyGraph(*fbody->graph, graph.get()); - OP_REQUIRES_OK_ASYNC(ctx, PropagateInheritedDevices(graph.get(), args), - done); + OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done); DeviceSet device_set; for (auto d : lib->device_mgr()->ListDevices()) { @@ -163,15 +162,10 @@ class PartitionedCallOp : public AsyncOpKernel { std::vector> ArgAndRetAllocAttrs; - // Propagates device annotations from the outer graph to the function body. - // // Pins each arg that emits a `DT_RESOURCE` tensor to the device on which the // corresponding resource lives. This ensures that the Placer assigns ops that - // access these resources to the appropriate devices. Additionally, places - // nodes that are unadorned with device annotations onto PartitiondCallOp's - // device. This lets call-site device annotations influence the execution - // of the function. - Status PropagateInheritedDevices(Graph* graph, const OpInputList& args) { + // access these resources to the appropriate devices. + Status PinResourceArgs(Graph* graph, const OpInputList& args) { for (Node* node : graph->op_nodes()) { string node_type = node->type_string(); if (node_type == FunctionLibraryDefinition::kArgOp) { @@ -184,18 +178,6 @@ class PartitionedCallOp : public AsyncOpKernel { ResourceHandle handle = args[index].flat()(0); node->set_assigned_device_name(handle.device()); } - } else if (node_type != FunctionLibraryDefinition::kRetOp) { - // All non-RetVal nodes that weren't explicitly placed by the user - // inherit PartitionedCallOp's device. RetVal placement is inferred by - // the placer, to avoid forcing the function's outputs through a single - // device. - // - // TODO(b/112166045): Plumb the original requested device into this - // OpKernel (this->requested_device() isn't reliable), and merge it - // with node->requested_device() if possible. - if (node->requested_device().empty()) { - node->set_requested_device(local_device_name_); - } } } return Status::OK(); diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index f6b1b34856..dba9779488 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -33,6 +33,7 @@ from tensorflow.python.eager import execute from tensorflow.python.eager import tape from tensorflow.python.eager.graph_only_ops import graph_placeholder from tensorflow.python.framework import c_api_util +from tensorflow.python.framework import device as pydev from tensorflow.python.framework import dtypes as dtypes_module from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_spec @@ -179,6 +180,14 @@ class CapturingGraph(ops.Graph): compute_device=compute_device) +def _get_device_functions(ctx, graph): + """Returns a tuple of device functions representing the device stack.""" + if ctx.executing_eagerly(): + return (pydev.merge_device(ctx.device_name),) + else: + return tuple(graph._device_functions_outer_to_inner) # pylint: disable=protected-access + + class FuncGraph(CapturingGraph): """Graph representing a function body. @@ -200,8 +209,8 @@ class FuncGraph(CapturingGraph): def __init__(self, name): """Construct a new FuncGraph. - The graph will inherit its graph key, collections, seed, and distribution - strategy stack from the current context or graph. + The graph will inherit its graph key, collections, seed, device stack, and + distribution strategy stack from the current context or graph. Args: name: the name of the function. @@ -219,9 +228,11 @@ class FuncGraph(CapturingGraph): if context.executing_eagerly(): self.seed = context.global_seed() self._xla_compile = (context.context().device_spec.device_type == "TPU") + self._add_device_to_stack(context.context().device_name) else: self.seed = graph.seed self._xla_compile = getattr(graph, "_xla_compile", False) + self._device_function_stack = graph._device_function_stack.copy() # pylint: disable=protected-access # TODO(b/112165328, b/112906995): summaries depend on inheriting collections # from the default graph even in eager mode. It'd be nice to not have a @@ -449,6 +460,8 @@ class GraphCallable(object): self._output_shapes = tuple( output.shape for output in self._func_graph.outputs) self._attrs = attrs or {} + self._device_functions = tuple( + self._func_graph._device_functions_outer_to_inner) # pylint: disable=protected-access self._inference_function = _EagerDefinedFunction( _inference_name(self._func_graph.name), self._func_graph, @@ -602,7 +615,17 @@ class GraphCallable(object): return self._captured_inputs def __call__(self, *args): - """Executes the passed function in eager mode.""" + """Executes the wrapped function.""" + ctx = context.context() + device_functions = _get_device_functions(ctx, ops.get_default_graph()) + if device_functions != self._device_functions: + raise ValueError( + "The current device stack does not match the device stack under " + "which the TensorFlow function '%s' was created.\n" + "Current device stack: %s\n%s device stack: %s" % + (self._inference_function.name, device_functions, + self._inference_function.name, self._device_functions)) + for v in self._func_graph.variables: if v.trainable: tape.watch_variable(v) @@ -614,7 +637,6 @@ class GraphCallable(object): if tape.should_record(tensor_inputs) or tape.should_record(captures): return self._backprop_call(args) - ctx = context.context() outputs = self._inference_function.call(ctx, args) return self._build_call_outputs(outputs) @@ -931,17 +953,24 @@ class _PolymorphicFunction(object): # then `instance` will be `foo` (and `owner` will be `Foo`). return functools.partial(self.__call__, instance) - def _cache_key(self, args, kwds): - """Computes the cache key given inputs.""" + def _cache_key(self, args, kwds, ctx, graph): + """Computes the cache key given inputs and execution context.""" if self._input_signature is None: inputs = (args, kwds) if kwds else args cache_key = tuple(_encode_arg(arg) for arg in inputs) else: del args, kwds cache_key = self._flat_input_signature + # The graph, or whether we're executing eagerly, should be a part of the # cache key so we don't improperly capture tensors such as variables. - return cache_key + (context.executing_eagerly() or ops.get_default_graph(),) + execution_context = ctx.executing_eagerly() or graph + + # Putting the device in the cache key ensures that call-site device + # annotations are respected. + device_functions = _get_device_functions(ctx, graph) + + return cache_key + (execution_context, device_functions) def _canonicalize_function_inputs(self, *args, **kwds): """Canonicalizes `args` and `kwds`. @@ -1029,7 +1058,8 @@ class _PolymorphicFunction(object): """ args, kwds = self._canonicalize_function_inputs(*args, **kwds) - cache_key = self._cache_key(args, kwds) + cache_key = self._cache_key(args, kwds, context.context(), + ops.get_default_graph()) with self._lock: try: graph_function = self._arguments_to_functions.get(cache_key, None) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index ca6aafd715..3e9bb91d54 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -978,39 +978,86 @@ class FunctionTest(test.TestCase): config=config_pb2.ConfigProto(device_count={'CPU': 4})) def testDeviceAnnotationsRespected(self): - @function.defun def multi_device_fn(): with ops.device('/cpu:0'): - s1 = iterator_ops.Iterator.from_structure( + s0 = iterator_ops.Iterator.from_structure( (dtypes.float32,)).string_handle() with ops.device('/cpu:1'): - s2 = iterator_ops.Iterator.from_structure( + s1 = iterator_ops.Iterator.from_structure( (dtypes.float32,)).string_handle() with ops.device('/cpu:2'): - s3 = iterator_ops.Iterator.from_structure( - (dtypes.float32,)).string_handle() - with ops.device(''): - # TODO(akshayka): This is unfortunate and brittle. It prevents - # `Iterator.from_structure` from assigning the iterator op to 'cpu:0'. - # Remove this hack once we have a way of obtaining metadata about - # function execution. - s4 = iterator_ops.Iterator.from_structure( + s2 = iterator_ops.Iterator.from_structure( (dtypes.float32,)).string_handle() - return s1, s2, s3, s4 + s3 = iterator_ops.Iterator.from_structure( + (dtypes.float32,)).string_handle() + return s0, s1, s2, s3 - with ops.device('/cpu:3'): - outputs = self.evaluate(multi_device_fn()) + defined = function.defun(multi_device_fn) + outputs = self.evaluate(defined()) + self.assertEqual(len(defined._arguments_to_functions), 1) self.assertIn(compat.as_bytes('CPU:0'), outputs[0]) self.assertIn(compat.as_bytes('CPU:1'), outputs[1]) self.assertIn(compat.as_bytes('CPU:2'), outputs[2]) - self.assertIn(compat.as_bytes('CPU:3'), outputs[3]) - with ops.device('/cpu:0'): - outputs = self.evaluate(multi_device_fn()) + with ops.device('/cpu:3'): + outputs = self.evaluate(defined()) + self.assertEqual(len(defined._arguments_to_functions), 2) self.assertIn(compat.as_bytes('CPU:0'), outputs[0]) self.assertIn(compat.as_bytes('CPU:1'), outputs[1]) self.assertIn(compat.as_bytes('CPU:2'), outputs[2]) - self.assertIn(compat.as_bytes('CPU:0'), outputs[3]) + self.assertIn(compat.as_bytes('CPU:3'), outputs[3]) + + # This should retrieve the call-site-device agnostic function + defined() + self.assertEqual(len(defined._arguments_to_functions), 2) + + # And this should retrieve the function created for '/cpu:3' + with ops.device('/cpu:3'): + defined() + self.assertEqual(len(defined._arguments_to_functions), 2) + + @test_util.run_in_graph_and_eager_modes( + config=config_pb2.ConfigProto(device_count={'CPU': 2})) + def testCallingGraphFunctionOnIncompatibleDeviceRaisesError(self): + + def func(): + return constant_op.constant(0) + + with ops.device('cpu:0'): + cpu_graph_function = function.make_defun_op(func) + + with ops.device('cpu:0'): + self.assertEqual( + self.evaluate(cpu_graph_function()), self.evaluate(func())) + + with self.assertRaisesRegexp( + ValueError, + 'The current device stack does not match the device stack under ' + 'which the TensorFlow function \'.*func.*\' was created.\n' + 'Current device stack: .*\n.*func.* device stack.*'): + with ops.device('cpu:1'): + cpu_graph_function() + + with self.assertRaisesRegexp( + ValueError, + 'The current device stack does not match the device stack under ' + 'which the TensorFlow function \'.*func.*\' was created.\n' + 'Current device stack: .*\n.*func.* device stack.*'): + with ops.device(None): + cpu_graph_function() + + default_graph_function = function.make_defun_op(func) + + self.assertEqual( + self.evaluate(default_graph_function()), self.evaluate(func())) + + with self.assertRaisesRegexp( + ValueError, + 'The current device stack does not match the device stack under ' + 'which the TensorFlow function \'.*func.*\' was created.\n' + 'Current device stack: .*\n.*func.* device stack.*'): + with ops.device('cpu:1'): + default_graph_function() def testVariablesAreTracked(self): v = resource_variable_ops.ResourceVariable(1.0) @@ -1039,18 +1086,23 @@ class FunctionTest(test.TestCase): defined = function.defun(func) defined(0, baz=20) + + def cache_keys(): + """Sanitizes cache keys of non-input metadata.""" + return tuple(key[:3] for key in defined._arguments_to_functions) + # `True` corresponds to the fact that we're executing eagerly - self.assertIn((0, 1, 20, True), defined._arguments_to_functions) + self.assertIn((0, 1, 20), cache_keys()) defined(1) # bar=1, baz=2 - self.assertIn((1, 1, 2, True), defined._arguments_to_functions) + self.assertIn((1, 1, 2), cache_keys()) # This matches the previous call. defined(foo=1) self.assertEqual(len(defined._arguments_to_functions), 2) defined(1, 2, 3) - self.assertIn((1, 2, 3, True), defined._arguments_to_functions) + self.assertIn((1, 2, 3), cache_keys()) # This matches the previous call. defined(1, bar=2, baz=3) diff --git a/tensorflow/python/framework/device.py b/tensorflow/python/framework/device.py index ab06a2babf..06c653097a 100644 --- a/tensorflow/python/framework/device.py +++ b/tensorflow/python/framework/device.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import copy +import threading from tensorflow.python.util.tf_export import tf_export @@ -229,6 +230,12 @@ class DeviceSpec(object): """ return DeviceSpec().parse_from_string(spec) + def __eq__(self, other): + return self.to_string() == other.to_string() + + def __hash__(self): + return hash(self.to_string()) + def check_valid(spec): """Check that a device spec is valid. @@ -254,6 +261,14 @@ def canonical_name(device): return device.to_string() +# Cache from DeviceSpec objects to their corresponding device functions. +# This cache is maintained for correctness, not performance: it makes it +# possible to compare the device function stacks belonging to different +# graphs in a meaningful way. +_cached_device_functions = {} +_cache_lock = threading.Lock() + + def merge_device(spec): """Returns a device function that merges devices specifications. @@ -280,11 +295,18 @@ def merge_device(spec): Raises: ValueError: if the spec was not valid. """ - if not isinstance(spec, DeviceSpec): - spec = DeviceSpec.from_string(spec or "") - def _device_function(node_def): - current_device = DeviceSpec.from_string(node_def.device or "") - copy_spec = copy.copy(spec) - copy_spec.merge_from(current_device) # current_device takes precedence. - return copy_spec - return _device_function + with _cache_lock: + if not isinstance(spec, DeviceSpec): + spec = DeviceSpec.from_string(spec or "") + cached_function = _cached_device_functions.get(spec, None) + if cached_function is not None: + return cached_function + + def _device_function(node_def): + current_device = DeviceSpec.from_string(node_def.device or "") + copy_spec = copy.copy(spec) + copy_spec.merge_from(current_device) # current_device takes precedence. + return copy_spec + + _cached_device_functions[spec] = _device_function + return _device_function diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py index 5db2e9821d..1e76ad7476 100644 --- a/tensorflow/python/kernel_tests/functional_ops_test.py +++ b/tensorflow/python/kernel_tests/functional_ops_test.py @@ -1075,30 +1075,13 @@ class PartitionedCallTest(test.TestCase): with ops.device("/cpu:2"): s3 = iterator_ops.Iterator.from_structure( (dtypes.float32,)).string_handle() - with ops.device(""): - # TODO(akshayka): This is unfortunate and brittle. It prevents - # `Iterator.from_structure` from assigning the iterator op to 'cpu:0'. - # Remove this hack once we have a way of obtaining metadata about - # function execution. - s4 = iterator_ops.Iterator.from_structure( - (dtypes.float32,)).string_handle() - return s1, s2, s3, s4 + return s1, s2, s3 with self.test_session(config=config, use_gpu=True) as sess: - with ops.device("/cpu:3"): - outputs = sess.run(functional_ops.partitioned_call(args=[], f=Body)) - self.assertIn(compat.as_bytes("CPU:0"), outputs[0]) - self.assertIn(compat.as_bytes("CPU:1"), outputs[1]) - self.assertIn(compat.as_bytes("CPU:2"), outputs[2]) - self.assertIn(compat.as_bytes("CPU:3"), outputs[3]) - - with self.test_session(config=config, use_gpu=True): - with ops.device("/cpu:0"): - outputs = sess.run(functional_ops.partitioned_call(args=[], f=Body)) + outputs = sess.run(functional_ops.partitioned_call(args=[], f=Body)) self.assertIn(compat.as_bytes("CPU:0"), outputs[0]) self.assertIn(compat.as_bytes("CPU:1"), outputs[1]) self.assertIn(compat.as_bytes("CPU:2"), outputs[2]) - self.assertIn(compat.as_bytes("CPU:0"), outputs[3]) def testAssignAddResourceVariable(self): -- GitLab From 77aa0e03921aa6a3805085e72c9afcc3a2e86834 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Aug 2018 14:17:40 -0700 Subject: [PATCH 081/380] Create an UnsupervisedInputReceiver for unsupervised problems. PiperOrigin-RevId: 209828317 --- tensorflow/python/estimator/export/export.py | 23 +++++++++++++++++++ .../python/estimator/export/export_test.py | 23 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py index 3d171f7811..7723fcce74 100644 --- a/tensorflow/python/estimator/export/export.py +++ b/tensorflow/python/estimator/export/export.py @@ -217,6 +217,29 @@ class TensorServingInputReceiver( receiver_tensors_alternatives=receiver.receiver_tensors_alternatives) +class UnsupervisedInputReceiver(ServingInputReceiver): + """A return type for a training_input_receiver_fn or eval_input_receiver_fn. + + This differs from SupervisedInputReceiver in that it does not require a set + of labels. + + The expected return values are: + features: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` or + `SparseTensor`, specifying the features to be passed to the model. + receiver_tensors: A `Tensor`, `SparseTensor`, or dict of string to `Tensor` + or `SparseTensor`, specifying input nodes where this receiver expects to + be fed by default. Typically, this is a single placeholder expecting + serialized `tf.Example` protos. + """ + + def __new__(cls, features, receiver_tensors): + return super(UnsupervisedInputReceiver, cls).__new__( + cls, + features=features, + receiver_tensors=receiver_tensors, + receiver_tensors_alternatives=None) + + class SupervisedInputReceiver( collections.namedtuple('SupervisedInputReceiver', ['features', 'labels', 'receiver_tensors'])): diff --git a/tensorflow/python/estimator/export/export_test.py b/tensorflow/python/estimator/export/export_test.py index 1d475adb43..e87b88327f 100644 --- a/tensorflow/python/estimator/export/export_test.py +++ b/tensorflow/python/estimator/export/export_test.py @@ -163,6 +163,29 @@ class ServingInputReceiverTest(test_util.TensorFlowTestCase): _ = export.ServingInputReceiver(feature, receiver_tensor) +class UnsupervisedInputReceiverTest(test_util.TensorFlowTestCase): + + # Since this is basically a wrapper around ServingInputReceiver, we only + # have a simple sanity check to ensure that it works. + + def test_unsupervised_input_receiver_constructor(self): + """Tests that no errors are raised when input is expected.""" + features = { + "feature0": + constant_op.constant([0]), + u"feature1": + constant_op.constant([1]), + "feature2": + sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[1], dense_shape=[1, 1]), + } + receiver_tensors = { + "example0": array_ops.placeholder(dtypes.string, name="example0"), + u"example1": array_ops.placeholder(dtypes.string, name="example1"), + } + export.UnsupervisedInputReceiver(features, receiver_tensors) + + class SupervisedInputReceiverTest(test_util.TensorFlowTestCase): def test_input_receiver_constructor(self): -- GitLab From c21e14a13387ce253f243320c156e9f71556bd21 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 22 Aug 2018 14:18:59 -0700 Subject: [PATCH 082/380] name and read_value arguments to tf.Variable.assign* PiperOrigin-RevId: 209828610 --- tensorflow/python/ops/variables.py | 48 +++++++++++++++---- .../api/golden/v1/tensorflow.-variable.pbtxt | 6 +-- .../api/golden/v2/tensorflow.-variable.pbtxt | 6 +-- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index c4eff6c57b..571265665b 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -459,7 +459,7 @@ class Variable(six.with_metaclass(VariableMetaclass, """ raise NotImplementedError - def assign(self, value, use_locking=False): + def assign(self, value, use_locking=False, name=None, read_value=True): """Assigns a new value to the variable. This is essentially a shortcut for `assign(self, value)`. @@ -467,6 +467,9 @@ class Variable(six.with_metaclass(VariableMetaclass, Args: value: A `Tensor`. The new value for this variable. use_locking: If `True`, use locking during the assignment. + name: The name of the operation to be created + read_value: if True, will return something which evaluates to the + new value of the variable; if False will return the assign op. Returns: A `Tensor` that will hold the new value of this variable after @@ -474,7 +477,7 @@ class Variable(six.with_metaclass(VariableMetaclass, """ raise NotImplementedError - def assign_add(self, delta, use_locking=False): + def assign_add(self, delta, use_locking=False, name=None, read_value=True): """Adds a value to this variable. This is essentially a shortcut for `assign_add(self, delta)`. @@ -482,6 +485,9 @@ class Variable(six.with_metaclass(VariableMetaclass, Args: delta: A `Tensor`. The value to add to this variable. use_locking: If `True`, use locking during the operation. + name: The name of the operation to be created + read_value: if True, will return something which evaluates to the + new value of the variable; if False will return the assign op. Returns: A `Tensor` that will hold the new value of this variable after @@ -489,7 +495,7 @@ class Variable(six.with_metaclass(VariableMetaclass, """ raise NotImplementedError - def assign_sub(self, delta, use_locking=False): + def assign_sub(self, delta, use_locking=False, name=None, read_value=True): """Subtracts a value from this variable. This is essentially a shortcut for `assign_sub(self, delta)`. @@ -497,6 +503,9 @@ class Variable(six.with_metaclass(VariableMetaclass, Args: delta: A `Tensor`. The value to subtract from this variable. use_locking: If `True`, use locking during the operation. + name: The name of the operation to be created + read_value: if True, will return something which evaluates to the + new value of the variable; if False will return the assign op. Returns: A `Tensor` that will hold the new value of this variable after @@ -1450,7 +1459,7 @@ class RefVariable(Variable): """ return self._constraint - def assign(self, value, use_locking=False): + def assign(self, value, use_locking=False, name=None, read_value=True): """Assigns a new value to the variable. This is essentially a shortcut for `assign(self, value)`. @@ -1458,14 +1467,21 @@ class RefVariable(Variable): Args: value: A `Tensor`. The new value for this variable. use_locking: If `True`, use locking during the assignment. + name: The name of the operation to be created + read_value: if True, will return something which evaluates to the + new value of the variable; if False will return the assign op. Returns: A `Tensor` that will hold the new value of this variable after the assignment has completed. """ - return state_ops.assign(self._variable, value, use_locking=use_locking) + assign = state_ops.assign(self._variable, value, use_locking=use_locking, + name=name) + if read_value: + return assign + return assign.op - def assign_add(self, delta, use_locking=False): + def assign_add(self, delta, use_locking=False, name=None, read_value=True): """Adds a value to this variable. This is essentially a shortcut for `assign_add(self, delta)`. @@ -1473,14 +1489,21 @@ class RefVariable(Variable): Args: delta: A `Tensor`. The value to add to this variable. use_locking: If `True`, use locking during the operation. + name: The name of the operation to be created + read_value: if True, will return something which evaluates to the + new value of the variable; if False will return the assign op. Returns: A `Tensor` that will hold the new value of this variable after the addition has completed. """ - return state_ops.assign_add(self._variable, delta, use_locking=use_locking) + assign = state_ops.assign_add( + self._variable, delta, use_locking=use_locking, name=name) + if read_value: + return assign + return assign.op - def assign_sub(self, delta, use_locking=False): + def assign_sub(self, delta, use_locking=False, name=None, read_value=True): """Subtracts a value from this variable. This is essentially a shortcut for `assign_sub(self, delta)`. @@ -1488,12 +1511,19 @@ class RefVariable(Variable): Args: delta: A `Tensor`. The value to subtract from this variable. use_locking: If `True`, use locking during the operation. + name: The name of the operation to be created + read_value: if True, will return something which evaluates to the + new value of the variable; if False will return the assign op. Returns: A `Tensor` that will hold the new value of this variable after the subtraction has completed. """ - return state_ops.assign_sub(self._variable, delta, use_locking=use_locking) + assign = state_ops.assign_sub( + self._variable, delta, use_locking=use_locking, name=name) + if read_value: + return assign + return assign.op def scatter_sub(self, sparse_delta, use_locking=False, name=None): """Subtracts `IndexedSlices` from this variable. diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt index f6099dd17a..05698b03ee 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt @@ -53,15 +53,15 @@ tf_class { } member_method { name: "assign" - argspec: "args=[\'self\', \'value\', \'use_locking\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'value\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], " } member_method { name: "assign_add" - argspec: "args=[\'self\', \'delta\', \'use_locking\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'delta\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], " } member_method { name: "assign_sub" - argspec: "args=[\'self\', \'delta\', \'use_locking\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'delta\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], " } member_method { name: "count_up_to" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt index f6099dd17a..05698b03ee 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt @@ -53,15 +53,15 @@ tf_class { } member_method { name: "assign" - argspec: "args=[\'self\', \'value\', \'use_locking\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'value\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], " } member_method { name: "assign_add" - argspec: "args=[\'self\', \'delta\', \'use_locking\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'delta\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], " } member_method { name: "assign_sub" - argspec: "args=[\'self\', \'delta\', \'use_locking\'], varargs=None, keywords=None, defaults=[\'False\'], " + argspec: "args=[\'self\', \'delta\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], " } member_method { name: "count_up_to" -- GitLab From 5022fc95aa9e958c98439215654b1efd352308ad Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Aug 2018 14:19:32 -0700 Subject: [PATCH 083/380] Internal change. PiperOrigin-RevId: 209828735 --- .../lite/kernels/internal/kernel_utils.cc | 615 ++++++++++++------ .../lite/kernels/internal/kernel_utils.h | 65 ++ 2 files changed, 470 insertions(+), 210 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc index 200f2f1515..88a0622286 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc @@ -127,6 +127,47 @@ void LstmStep( float* cell_state_ptr, float* input_gate_scratch, float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch, float* output_ptr_batch) { + LstmStepWithAuxInput( + input_ptr_batch, input_to_input_weights_ptr, input_to_forget_weights_ptr, + input_to_cell_weights_ptr, input_to_output_weights_ptr, + /*aux_input_ptr_batch=*/nullptr, + /*aux_input_to_input_weights_ptr=*/nullptr, + /*aux_input_to_forget_weights_ptr=*/nullptr, + /*aux_input_to_cell_weights_ptr=*/nullptr, + /*aux_input_to_output_weights_ptr=*/nullptr, + recurrent_to_input_weights_ptr, recurrent_to_forget_weights_ptr, + recurrent_to_cell_weights_ptr, recurrent_to_output_weights_ptr, + cell_to_input_weights_ptr, cell_to_forget_weights_ptr, + cell_to_output_weights_ptr, input_gate_bias_ptr, forget_gate_bias_ptr, + cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr, + projection_bias_ptr, params, n_batch, n_cell, n_input, n_output, + output_state_ptr, cell_state_ptr, input_gate_scratch, forget_gate_scratch, + cell_scratch, output_gate_scratch, output_ptr_batch); +} + +void LstmStepWithAuxInput( + const float* input_ptr_batch, const float* input_to_input_weights_ptr, + const float* input_to_forget_weights_ptr, + const float* input_to_cell_weights_ptr, + const float* input_to_output_weights_ptr, const float* aux_input_ptr_batch, + const float* aux_input_to_input_weights_ptr, + const float* aux_input_to_forget_weights_ptr, + const float* aux_input_to_cell_weights_ptr, + const float* aux_input_to_output_weights_ptr, + const float* recurrent_to_input_weights_ptr, + const float* recurrent_to_forget_weights_ptr, + const float* recurrent_to_cell_weights_ptr, + const float* recurrent_to_output_weights_ptr, + const float* cell_to_input_weights_ptr, + const float* cell_to_forget_weights_ptr, + const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr, + const float* forget_gate_bias_ptr, const float* cell_bias_ptr, + const float* output_gate_bias_ptr, const float* projection_weights_ptr, + const float* projection_bias_ptr, const TfLiteLSTMParams* params, + int n_batch, int n_cell, int n_input, int n_output, float* output_state_ptr, + float* cell_state_ptr, float* input_gate_scratch, + float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch, + float* output_ptr_batch) { // Since we have already checked that weights are all there or none, we can // check the existense of only one to the get the condition. const bool use_cifg = (input_to_input_weights_ptr == nullptr); @@ -160,6 +201,25 @@ void LstmStep( input_to_output_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, output_gate_scratch, /*result_stride=*/1); + // If auxiliary input is available then compute aux_input_weight * aux_input + if (aux_input_ptr_batch != nullptr) { + if (!use_cifg) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_to_input_weights_ptr, n_cell, n_input, aux_input_ptr_batch, + n_batch, input_gate_scratch, /*result_stride=*/1); + } + + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_to_forget_weights_ptr, n_cell, n_input, aux_input_ptr_batch, + n_batch, forget_gate_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_to_cell_weights_ptr, n_cell, n_input, aux_input_ptr_batch, + n_batch, cell_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_to_output_weights_ptr, n_cell, n_input, aux_input_ptr_batch, + n_batch, output_gate_scratch, /*result_stride=*/1); + } + // For each batch and cell: compute recurrent_weight * output_state. if (!use_cifg) { tensor_utils::MatrixBatchVectorMultiplyAccumulate( @@ -286,227 +346,362 @@ void LstmStep( int8_t* quantized_input_ptr_batch, int8_t* quantized_output_state_ptr, int8_t* quantized_cell_state_ptr, float* output_state_ptr, float* cell_state_ptr, float* output_ptr_batch) { - // Since we have already checked that weights are all there or none, we can - // check the existense of only one to the get the condition. - const bool use_cifg = (input_to_input_weights_ptr == nullptr); - const bool use_peephole = (cell_to_output_weights_ptr != nullptr); - // Initialize scratch buffers with bias. - if (!use_cifg) { - tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch, - input_gate_scratch); - } - tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch, - forget_gate_scratch); - tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch, - cell_scratch); - tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch, - output_gate_scratch); - - if (!tensor_utils::IsZeroVector(input_ptr_batch, n_batch * n_input)) { - // Save quantization and matmul computation for all zero input. - float unused_min, unused_max; - for (int b = 0; b < n_batch; ++b) { - const int offset = b * n_input; - tensor_utils::SymmetricQuantizeFloats( - input_ptr_batch + offset, n_input, quantized_input_ptr_batch + offset, - &unused_min, &unused_max, &scaling_factors[b]); + LstmStepWithAuxInput( + input_ptr_batch, input_to_input_weights_ptr, input_to_input_weights_scale, + input_to_forget_weights_ptr, input_to_forget_weights_scale, + input_to_cell_weights_ptr, input_to_cell_weights_scale, + input_to_output_weights_ptr, input_to_output_weights_scale, + /*aux_input_ptr_batch=*/nullptr, + /*aux_input_to_input_weights_ptr=*/nullptr, + /*aux_input_to_input_weights_scale=*/0.0f, + /*aux_input_to_forget_weights_ptr=*/nullptr, + /*aux_input_to_forget_weights_scale=*/0.0f, + /*aux_input_to_cell_weights_ptr=*/nullptr, + /*aux_input_to_cell_weights_scale=*/0.0f, + /*aux_input_to_output_weights_ptr=*/nullptr, + /*aux_input_to_output_weights_scale=*/0.0f, + recurrent_to_input_weights_ptr, recurrent_to_input_weights_scale, + recurrent_to_forget_weights_ptr, recurrent_to_forget_weights_scale, + recurrent_to_cell_weights_ptr, recurrent_to_cell_weights_scale, + recurrent_to_output_weights_ptr, recurrent_to_output_weights_scale, + cell_to_input_weights_ptr, cell_to_input_weights_scale, + cell_to_forget_weights_ptr, cell_to_forget_weights_scale, + cell_to_output_weights_ptr, cell_to_output_weights_scale, + input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr, + output_gate_bias_ptr, projection_weights_ptr, projection_weights_scale, + projection_bias_ptr, params, n_batch, n_cell, n_input, n_output, + input_gate_scratch, forget_gate_scratch, cell_scratch, + output_gate_scratch, scaling_factors, product_scaling_factors, + recovered_cell_weights, quantized_input_ptr_batch, + /*quantized_aux_input_ptr_batch=*/nullptr, quantized_output_state_ptr, + quantized_cell_state_ptr, output_state_ptr, cell_state_ptr, + output_ptr_batch); } - // For each batch and cell: compute input_weight * input. - if (!use_cifg) { - for (int b = 0; b < n_batch; ++b) { - product_scaling_factors[b] = - scaling_factors[b] * input_to_input_weights_scale; - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_input_weights_ptr, n_cell, n_input, - quantized_input_ptr_batch, product_scaling_factors, n_batch, - input_gate_scratch, /*result_stride=*/1); - } - - for (int b = 0; b < n_batch; ++b) { - product_scaling_factors[b] = - scaling_factors[b] * input_to_forget_weights_scale; - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_forget_weights_ptr, n_cell, n_input, quantized_input_ptr_batch, - product_scaling_factors, n_batch, forget_gate_scratch, - /*result_stride=*/1); - for (int b = 0; b < n_batch; ++b) { - product_scaling_factors[b] = - scaling_factors[b] * input_to_cell_weights_scale; - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_cell_weights_ptr, n_cell, n_input, quantized_input_ptr_batch, - product_scaling_factors, n_batch, cell_scratch, /*result_stride=*/1); - - for (int b = 0; b < n_batch; ++b) { - product_scaling_factors[b] = - scaling_factors[b] * input_to_output_weights_scale; - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_output_weights_ptr, n_cell, n_input, quantized_input_ptr_batch, - product_scaling_factors, n_batch, output_gate_scratch, - /*result_stride=*/1); - } - - if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) { - // Save quantization and matmul computation for all zero input. - float unused_min, unused_max; - for (int b = 0; b < n_batch; ++b) { - const int offset = b * n_output; - tensor_utils::SymmetricQuantizeFloats(output_state_ptr + offset, n_output, - quantized_output_state_ptr + offset, - &unused_min, &unused_max, - &scaling_factors[b]); - } - // For each batch and cell: compute recurrent_weight * output_state. - if (!use_cifg) { - for (int b = 0; b < n_batch; ++b) { - product_scaling_factors[b] = - scaling_factors[b] * recurrent_to_input_weights_scale; + void LstmStepWithAuxInput( + const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr, + float input_to_input_weights_scale, + const int8_t* input_to_forget_weights_ptr, + float input_to_forget_weights_scale, + const int8_t* input_to_cell_weights_ptr, + float input_to_cell_weights_scale, + const int8_t* input_to_output_weights_ptr, + float input_to_output_weights_scale, const float* aux_input_ptr_batch, + const int8_t* aux_input_to_input_weights_ptr, + float aux_input_to_input_weights_scale, + const int8_t* aux_input_to_forget_weights_ptr, + float aux_input_to_forget_weights_scale, + const int8_t* aux_input_to_cell_weights_ptr, + float aux_input_to_cell_weights_scale, + const int8_t* aux_input_to_output_weights_ptr, + float aux_input_to_output_weights_scale, + const int8_t* recurrent_to_input_weights_ptr, + float recurrent_to_input_weights_scale, + const int8_t* recurrent_to_forget_weights_ptr, + float recurrent_to_forget_weights_scale, + const int8_t* recurrent_to_cell_weights_ptr, + float recurrent_to_cell_weights_scale, + const int8_t* recurrent_to_output_weights_ptr, + float recurrent_to_output_weights_scale, + const int8_t* cell_to_input_weights_ptr, + float cell_to_input_weights_scale, + const int8_t* cell_to_forget_weights_ptr, + float cell_to_forget_weights_scale, + const int8_t* cell_to_output_weights_ptr, + float cell_to_output_weights_scale, const float* input_gate_bias_ptr, + const float* forget_gate_bias_ptr, const float* cell_bias_ptr, + const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr, + float projection_weights_scale, const float* projection_bias_ptr, + const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input, + int n_output, float* input_gate_scratch, float* forget_gate_scratch, + float* cell_scratch, float* output_gate_scratch, float* scaling_factors, + float* product_scaling_factors, float* recovered_cell_weights, + int8_t* quantized_input_ptr_batch, + int8_t* quantized_aux_input_ptr_batch, + int8_t* quantized_output_state_ptr, int8_t* quantized_cell_state_ptr, + float* output_state_ptr, float* cell_state_ptr, + float* output_ptr_batch) { + // Since we have already checked that weights are all there or none, we + // can check the existense of only one to the get the condition. + const bool use_cifg = (input_to_input_weights_ptr == nullptr); + const bool use_peephole = (cell_to_output_weights_ptr != nullptr); + // Initialize scratch buffers with bias. + if (!use_cifg) { + tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, + n_batch, input_gate_scratch); + } + tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, + n_batch, forget_gate_scratch); + tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch, + cell_scratch); + tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, + n_batch, output_gate_scratch); + + if (!tensor_utils::IsZeroVector(input_ptr_batch, n_batch * n_input)) { + // Save quantization and matmul computation for all zero input. + float unused_min, unused_max; + for (int b = 0; b < n_batch; ++b) { + const int offset = b * n_input; + tensor_utils::SymmetricQuantizeFloats( + input_ptr_batch + offset, n_input, + quantized_input_ptr_batch + offset, &unused_min, &unused_max, + &scaling_factors[b]); + } + // For each batch and cell: compute input_weight * input. + if (!use_cifg) { + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * input_to_input_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_input_weights_ptr, n_cell, n_input, + quantized_input_ptr_batch, product_scaling_factors, n_batch, + input_gate_scratch, /*result_stride=*/1); + } + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * input_to_forget_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_forget_weights_ptr, n_cell, n_input, + quantized_input_ptr_batch, product_scaling_factors, n_batch, + forget_gate_scratch, + /*result_stride=*/1); + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * input_to_cell_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_cell_weights_ptr, n_cell, n_input, + quantized_input_ptr_batch, product_scaling_factors, n_batch, + cell_scratch, /*result_stride=*/1); + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * input_to_output_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_output_weights_ptr, n_cell, n_input, + quantized_input_ptr_batch, product_scaling_factors, n_batch, + output_gate_scratch, + /*result_stride=*/1); } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_input_weights_ptr, n_cell, n_output, - quantized_output_state_ptr, product_scaling_factors, n_batch, - input_gate_scratch, /*result_stride=*/1); - } - - for (int b = 0; b < n_batch; ++b) { - product_scaling_factors[b] = - scaling_factors[b] * recurrent_to_forget_weights_scale; - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_forget_weights_ptr, n_cell, n_output, - quantized_output_state_ptr, product_scaling_factors, n_batch, - forget_gate_scratch, /*result_stride=*/1); - - for (int b = 0; b < n_batch; ++b) { - product_scaling_factors[b] = - scaling_factors[b] * recurrent_to_cell_weights_scale; - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_cell_weights_ptr, n_cell, n_output, - quantized_output_state_ptr, product_scaling_factors, n_batch, - cell_scratch, /*result_stride=*/1); - - for (int b = 0; b < n_batch; ++b) { - product_scaling_factors[b] = - scaling_factors[b] * recurrent_to_output_weights_scale; - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_output_weights_ptr, n_cell, n_output, - quantized_output_state_ptr, product_scaling_factors, n_batch, - output_gate_scratch, /*result_stride=*/1); - } - - // Save quantization and matmul computation for all zero input. - bool is_cell_state_all_zeros = - tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell); - // For each batch and cell: update input gate. - if (!use_cifg) { - if (use_peephole && !is_cell_state_all_zeros) { - tensor_utils::VectorScalarMultiply(cell_to_input_weights_ptr, n_cell, - cell_to_input_weights_scale, - recovered_cell_weights); - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - recovered_cell_weights, n_cell, cell_state_ptr, n_batch, - input_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, - input_gate_scratch); - } + if (aux_input_ptr_batch != nullptr && + !tensor_utils::IsZeroVector(aux_input_ptr_batch, n_batch * n_input)) { + // Save quantization and matmul computation for all zero input. + float unused_min, unused_max; + for (int b = 0; b < n_batch; ++b) { + const int offset = b * n_input; + tensor_utils::SymmetricQuantizeFloats( + aux_input_ptr_batch + offset, n_input, + quantized_aux_input_ptr_batch + offset, &unused_min, &unused_max, + &scaling_factors[b]); + } + // For each batch and cell: compute input_weight * input. + if (!use_cifg) { + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * aux_input_to_input_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_to_input_weights_ptr, n_cell, n_input, + quantized_aux_input_ptr_batch, product_scaling_factors, n_batch, + input_gate_scratch, /*result_stride=*/1); + } + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * aux_input_to_forget_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_to_forget_weights_ptr, n_cell, n_input, + quantized_aux_input_ptr_batch, product_scaling_factors, n_batch, + forget_gate_scratch, /*result_stride=*/1); + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * aux_input_to_cell_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_to_cell_weights_ptr, n_cell, n_input, + quantized_aux_input_ptr_batch, product_scaling_factors, n_batch, + cell_scratch, /*result_stride=*/1); + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * aux_input_to_output_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_to_output_weights_ptr, n_cell, n_input, + quantized_aux_input_ptr_batch, product_scaling_factors, n_batch, + output_gate_scratch, /*result_stride=*/1); + } - // For each batch and cell: update forget gate. - if (use_peephole && !is_cell_state_all_zeros) { - tensor_utils::VectorScalarMultiply(cell_to_forget_weights_ptr, n_cell, - cell_to_forget_weights_scale, - recovered_cell_weights); - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - recovered_cell_weights, n_cell, cell_state_ptr, n_batch, - forget_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, - forget_gate_scratch); + if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) { + // Save quantization and matmul computation for all zero input. + float unused_min, unused_max; + for (int b = 0; b < n_batch; ++b) { + const int offset = b * n_output; + tensor_utils::SymmetricQuantizeFloats( + output_state_ptr + offset, n_output, + quantized_output_state_ptr + offset, &unused_min, &unused_max, + &scaling_factors[b]); + } + // For each batch and cell: compute recurrent_weight * output_state. + if (!use_cifg) { + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * recurrent_to_input_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_input_weights_ptr, n_cell, n_output, + quantized_output_state_ptr, product_scaling_factors, n_batch, + input_gate_scratch, /*result_stride=*/1); + } + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * recurrent_to_forget_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_forget_weights_ptr, n_cell, n_output, + quantized_output_state_ptr, product_scaling_factors, n_batch, + forget_gate_scratch, /*result_stride=*/1); + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * recurrent_to_cell_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_cell_weights_ptr, n_cell, n_output, + quantized_output_state_ptr, product_scaling_factors, n_batch, + cell_scratch, /*result_stride=*/1); + + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * recurrent_to_output_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_output_weights_ptr, n_cell, n_output, + quantized_output_state_ptr, product_scaling_factors, n_batch, + output_gate_scratch, /*result_stride=*/1); + } - // For each batch and cell: update the cell. - tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr, - n_batch * n_cell, cell_state_ptr); - tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, - params->activation, cell_scratch); - if (use_cifg) { - tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, - forget_gate_scratch); - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr); - } else { - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr); - } - if (params->cell_clip > 0.0) { - tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell, - params->cell_clip, cell_state_ptr); - } + // Save quantization and matmul computation for all zero input. + bool is_cell_state_all_zeros = + tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell); + + // For each batch and cell: update input gate. + if (!use_cifg) { + if (use_peephole && !is_cell_state_all_zeros) { + tensor_utils::VectorScalarMultiply(cell_to_input_weights_ptr, n_cell, + cell_to_input_weights_scale, + recovered_cell_weights); + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + recovered_cell_weights, n_cell, cell_state_ptr, n_batch, + input_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, + input_gate_scratch); + } - is_cell_state_all_zeros = - tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell); - // For each batch and cell: update the output gate. - if (use_peephole && !is_cell_state_all_zeros) { - tensor_utils::VectorScalarMultiply(cell_to_output_weights_ptr, n_cell, - cell_to_output_weights_scale, - recovered_cell_weights); - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - recovered_cell_weights, n_cell, cell_state_ptr, n_batch, - output_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, - output_gate_scratch); - tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell, - params->activation, cell_scratch); - tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, - n_batch * n_cell, output_gate_scratch); + // For each batch and cell: update forget gate. + if (use_peephole && !is_cell_state_all_zeros) { + tensor_utils::VectorScalarMultiply(cell_to_forget_weights_ptr, n_cell, + cell_to_forget_weights_scale, + recovered_cell_weights); + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + recovered_cell_weights, n_cell, cell_state_ptr, n_batch, + forget_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, + forget_gate_scratch); + + // For each batch and cell: update the cell. + tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, + cell_state_ptr, n_batch * n_cell, + cell_state_ptr); + tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, + params->activation, cell_scratch); + if (use_cifg) { + tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, + forget_gate_scratch); + tensor_utils::VectorVectorCwiseProductAccumulate( + cell_scratch, forget_gate_scratch, n_batch * n_cell, + cell_state_ptr); + } else { + tensor_utils::VectorVectorCwiseProductAccumulate( + cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr); + } + if (params->cell_clip > 0.0) { + tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell, + params->cell_clip, cell_state_ptr); + } - // For each batch: update the projection and output_state. - const bool use_projection_weight = (projection_weights_ptr != nullptr); - const bool use_projection_bias = (projection_bias_ptr != nullptr); - if (use_projection_weight) { - if (use_projection_bias) { - tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output, - n_batch, output_ptr_batch); - } else { - tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output); - } - if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) { - // Save quantization and matmul computation for all zero input. - float unused_min, unused_max; - for (int b = 0; b < n_batch; ++b) { - const int offset = b * n_cell; - tensor_utils::SymmetricQuantizeFloats( - output_gate_scratch + offset, n_cell, - quantized_cell_state_ptr + offset, &unused_min, &unused_max, - &scaling_factors[b]); + is_cell_state_all_zeros = + tensor_utils::IsZeroVector(cell_state_ptr, n_batch * n_cell); + // For each batch and cell: update the output gate. + if (use_peephole && !is_cell_state_all_zeros) { + tensor_utils::VectorScalarMultiply(cell_to_output_weights_ptr, n_cell, + cell_to_output_weights_scale, + recovered_cell_weights); + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + recovered_cell_weights, n_cell, cell_state_ptr, n_batch, + output_gate_scratch); } - for (int b = 0; b < n_batch; ++b) { - product_scaling_factors[b] = - scaling_factors[b] * projection_weights_scale; + tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, + output_gate_scratch); + tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell, + params->activation, cell_scratch); + tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, + n_batch * n_cell, + output_gate_scratch); + + // For each batch: update the projection and output_state. + const bool use_projection_weight = (projection_weights_ptr != nullptr); + const bool use_projection_bias = (projection_bias_ptr != nullptr); + if (use_projection_weight) { + if (use_projection_bias) { + tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output, + n_batch, output_ptr_batch); + } else { + tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output); + } + if (!tensor_utils::IsZeroVector(output_gate_scratch, + n_batch * n_cell)) { + // Save quantization and matmul computation for all zero input. + float unused_min, unused_max; + for (int b = 0; b < n_batch; ++b) { + const int offset = b * n_cell; + tensor_utils::SymmetricQuantizeFloats( + output_gate_scratch + offset, n_cell, + quantized_cell_state_ptr + offset, &unused_min, &unused_max, + &scaling_factors[b]); + } + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * projection_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + projection_weights_ptr, n_output, n_cell, + quantized_cell_state_ptr, product_scaling_factors, n_batch, + output_ptr_batch, + /*result_stride=*/1); + } + if (params->proj_clip > 0.0) { + tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output, + params->proj_clip, output_ptr_batch); + } + } else { + tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, + output_ptr_batch); } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - projection_weights_ptr, n_output, n_cell, quantized_cell_state_ptr, - product_scaling_factors, n_batch, output_ptr_batch, - /*result_stride=*/1); - } - if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output, - params->proj_clip, output_ptr_batch); + tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output, + output_state_ptr); } - } else { - tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, - output_ptr_batch); - } - tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output, - output_state_ptr); -} } // namespace kernel_utils } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h index 2a11b37a60..1824126828 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h @@ -92,6 +92,31 @@ void LstmStep( float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch, float* output_ptr_batch); +// Same as above but includes an auxiliary input with the corresponding weights. +void LstmStepWithAuxInput( + const float* input_ptr_batch, const float* input_to_input_weights_ptr, + const float* input_to_forget_weights_ptr, + const float* input_to_cell_weights_ptr, + const float* input_to_output_weights_ptr, const float* aux_input_ptr_batch, + const float* aux_input_to_input_weights_ptr, + const float* aux_input_to_forget_weights_ptr, + const float* aux_input_to_cell_weights_ptr, + const float* aux_input_to_output_weights_ptr, + const float* recurrent_to_input_weights_ptr, + const float* recurrent_to_forget_weights_ptr, + const float* recurrent_to_cell_weights_ptr, + const float* recurrent_to_output_weights_ptr, + const float* cell_to_input_weights_ptr, + const float* cell_to_forget_weights_ptr, + const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr, + const float* forget_gate_bias_ptr, const float* cell_bias_ptr, + const float* output_gate_bias_ptr, const float* projection_weights_ptr, + const float* projection_bias_ptr, const TfLiteLSTMParams* params, + int n_batch, int n_cell, int n_input, int n_output, float* output_state_ptr, + float* cell_state_ptr, float* input_gate_scratch, + float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch, + float* output_ptr_batch); + // Same as above but with quantized weight matrices. In detail: // Input of size 'n_batch * n_input': // input_ptr_batch @@ -175,6 +200,46 @@ void LstmStep( int8_t* quantized_cell_state_ptr, float* output_state_ptr, float* cell_state_ptr, float* output_ptr_batch); +void LstmStepWithAuxInput( + const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr, + float input_to_input_weights_scale, + const int8_t* input_to_forget_weights_ptr, + float input_to_forget_weights_scale, + const int8_t* input_to_cell_weights_ptr, float input_to_cell_weights_scale, + const int8_t* input_to_output_weights_ptr, + float input_to_output_weights_scale, const float* aux_input_ptr_batch, + const int8_t* aux_input_to_input_weights_ptr, + float aux_input_to_input_weights_scale, + const int8_t* aux_input_to_forget_weights_ptr, + float aux_input_to_forget_weights_scale, + const int8_t* aux_input_to_cell_weights_ptr, + float aux_input_to_cell_weights_scale, + const int8_t* aux_input_to_output_weights_ptr, + float aux_input_to_output_weights_scale, + const int8_t* recurrent_to_input_weights_ptr, + float recurrent_to_input_weights_scale, + const int8_t* recurrent_to_forget_weights_ptr, + float recurrent_to_forget_weights_scale, + const int8_t* recurrent_to_cell_weights_ptr, + float recurrent_to_cell_weights_scale, + const int8_t* recurrent_to_output_weights_ptr, + float recurrent_to_output_weights_scale, + const int8_t* cell_to_input_weights_ptr, float cell_to_input_weights_scale, + const int8_t* cell_to_forget_weights_ptr, + float cell_to_forget_weights_scale, + const int8_t* cell_to_output_weights_ptr, + float cell_to_output_weights_scale, const float* input_gate_bias_ptr, + const float* forget_gate_bias_ptr, const float* cell_bias_ptr, + const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr, + float projection_weights_scale, const float* projection_bias_ptr, + const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input, + int n_output, float* input_gate_scratch, float* forget_gate_scratch, + float* cell_scratch, float* output_gate_scratch, float* scaling_factors, + float* product_scaling_factors, float* recovered_cell_weights, + int8_t* quantized_input_ptr_batch, int8_t* quantized_aux_input_ptr_batch, + int8_t* quantized_output_state_ptr, int8_t* quantized_cell_state_ptr, + float* output_state_ptr, float* cell_state_ptr, float* output_ptr_batch); + } // namespace kernel_utils } // namespace tflite #endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_KERNEL_UTILS_H_ -- GitLab From 915fd68aa46f0f402a6939e547dded4a6b6dc60b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Aug 2018 14:23:35 -0700 Subject: [PATCH 084/380] Use tf.shape to get `max_time` inside _ApplyLengthsToBatch in case the tensor is dynamic shaped. PiperOrigin-RevId: 209829459 --- tensorflow/contrib/recurrent/python/ops/functional_rnn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/recurrent/python/ops/functional_rnn.py b/tensorflow/contrib/recurrent/python/ops/functional_rnn.py index 67a8f59c3c..4d79a4d120 100644 --- a/tensorflow/contrib/recurrent/python/ops/functional_rnn.py +++ b/tensorflow/contrib/recurrent/python/ops/functional_rnn.py @@ -178,7 +178,8 @@ def _ApplyLengthsToBatch(sequence_lengths, tf_output): # TODO(drpng): just use Update so that we don't carry over the gradients? """Sets the output to be zero at the end of the sequence.""" # output is batch major. - batch_size, max_time, vector_size = tf_output.shape + shape = array_ops.shape(tf_output) + batch_size, max_time, vector_size = shape[0], shape[1], shape[2] output_time = array_ops.tile(math_ops.range(0, max_time), [batch_size]) output_time = array_ops.reshape(output_time, [batch_size, max_time]) lengths = array_ops.tile( -- GitLab From 091c9809b8dfdb361a476472cb5d10d79b7cd41f Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Wed, 22 Aug 2018 14:27:53 -0700 Subject: [PATCH 085/380] Preallocate the node structure std::vector PiperOrigin-RevId: 209830234 --- tensorflow/contrib/lite/interpreter.cc | 4 ++++ tensorflow/contrib/lite/interpreter.h | 5 +++++ tensorflow/contrib/lite/model.cc | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 362e588725..5ab53f4c1d 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -476,6 +476,10 @@ TfLiteStatus Interpreter::ResetVariableTensorsToZero() { return kTfLiteOk; } +void Interpreter::ReserveNodes(int count) { + nodes_and_registration_.reserve(count); +} + TfLiteStatus Interpreter::AddNodeWithParameters( const std::vector& inputs, const std::vector& outputs, const char* init_data, size_t init_data_size, void* builtin_data, diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index 7d69aa2ad3..2b1f1819b9 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -136,6 +136,11 @@ class Interpreter { // interpreter. TfLiteStatus SetVariables(std::vector variables); + // Ensure the internal node storage memory allocates at least `count` + // spots for node. NOTE, this doesn't actually add operators. This is an + // efficiency optimization that is subject to change. + void ReserveNodes(int count); + // Adds a node with the given parameters and returns the index of the new // node in `node_index` (optionally). Interpreter will take ownership of // `builtin_data` and destroy it with `free`. Ownership of 'init_data' diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 5f8d5c318a..e10a53b9a2 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -802,6 +802,10 @@ TfLiteStatus InterpreterBuilder::ParseNodes( const flatbuffers::Vector>* operators, Interpreter* interpreter) { TfLiteStatus status = kTfLiteOk; + + // Reduce the number of redundant allocations + interpreter->ReserveNodes(operators->Length()); + for (int i = 0; i < operators->Length(); ++i) { const auto* op = operators->Get(i); int index = op->opcode_index(); -- GitLab From ce127f779dbc6f9d65e17cc3c38f37a06ba666d0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Aug 2018 14:29:51 -0700 Subject: [PATCH 086/380] Fix the interaction between the split handler ops and the grow tree ensemble ops when we have nodes with no examples. PiperOrigin-RevId: 209830550 --- .../kernels/split_handler_ops.cc | 1 + .../boosted_trees/kernels/training_ops.cc | 43 +- .../batch/ordinal_split_handler_test.py | 59 +- .../boosted_trees/proto/split_info.proto | 4 + .../python/kernel_tests/training_ops_test.py | 547 +++++++++++++++++- 5 files changed, 605 insertions(+), 49 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc index 64349cfca3..3a48635319 100644 --- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc @@ -400,6 +400,7 @@ class BuildDenseInequalitySplitsOp : public OpKernel { const int start_index = partition_boundaries[root_idx]; (*output_partition_ids)(root_idx) = partition_ids(start_index); + oblivious_split_info.add_children_parent_id(partition_ids(start_index)); } oblivious_split_info.SerializeToString(&(*output_splits)(0)); } diff --git a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc index bb5ae78d9b..ab2853352a 100644 --- a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // ============================================================================= +#include + #include "tensorflow/contrib/boosted_trees/lib/utils/dropout_utils.h" #include "tensorflow/contrib/boosted_trees/proto/learner.pb.h" #include "tensorflow/contrib/boosted_trees/proto/split_info.pb.h" @@ -772,20 +774,32 @@ class GrowTreeEnsembleOp : public OpKernel { // The number of new children. int num_children = 1 << (depth + 1); auto split_info = split->oblivious_split_info; - CHECK(num_children == split_info.children_size()) - << "Wrong number of new children: " << num_children - << " != " << split_info.children_size(); - for (int idx = 0; idx < num_children; idx += 2) { - // Old leaf is at position depth + idx / 2. + CHECK(num_children >= split_info.children_size()) + << "Too many new children, expected <= " << num_children << " and got " + << split_info.children_size(); + std::vector new_leaves; + new_leaves.reserve(num_children); + int next_id = 0; + for (int idx = 0; idx < num_children / 2; idx++) { trees::Leaf old_leaf = - *tree_config->mutable_nodes(depth + idx / 2)->mutable_leaf(); - // Update left leaf. - *split_info.mutable_children(idx) = - *MergeLeafWeights(old_leaf, split_info.mutable_children(idx)); - // Update right leaf. - *split_info.mutable_children(idx + 1) = - *MergeLeafWeights(old_leaf, split_info.mutable_children(idx + 1)); + *tree_config->mutable_nodes(depth + idx)->mutable_leaf(); + // Check if a split was made for this leaf. + if (next_id < split_info.children_parent_id_size() && + depth + idx == split_info.children_parent_id(next_id)) { + // Add left leaf. + new_leaves.push_back(*MergeLeafWeights( + old_leaf, split_info.mutable_children(2 * next_id))); + // Add right leaf. + new_leaves.push_back(*MergeLeafWeights( + old_leaf, split_info.mutable_children(2 * next_id + 1))); + next_id++; + } else { + // If there is no split for this leaf, just duplicate it. + new_leaves.push_back(old_leaf); + new_leaves.push_back(old_leaf); + } } + CHECK(next_id == split_info.children_parent_id_size()); TreeNodeMetadata* split_metadata = split_info.mutable_split_node()->mutable_node_metadata(); split_metadata->set_gain(split->gain); @@ -804,11 +818,10 @@ class GrowTreeEnsembleOp : public OpKernel { if (idx + depth + 1 < nodes_size) { // Update leaves that were already there. *tree_config->mutable_nodes(idx + depth + 1)->mutable_leaf() = - *split_info.mutable_children(idx); + new_leaves[idx]; } else { // Add new leaves. - *tree_config->add_nodes()->mutable_leaf() = - *split_info.mutable_children(idx); + *tree_config->add_nodes()->mutable_leaf() = new_leaves[idx]; } } } diff --git a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py index d9caebb645..31043264a1 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py +++ b/tensorflow/contrib/boosted_trees/lib/learner/batch/ordinal_split_handler_test.py @@ -186,14 +186,14 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): with self.test_session() as sess: # The data looks like the following: # Example | Gradients | Partition | Dense Quantile | - # i0 | (0.2, 0.12) | 0 | 2 | - # i1 | (-0.5, 0.07) | 0 | 2 | - # i2 | (1.2, 0.2) | 0 | 0 | - # i3 | (4.0, 0.13) | 1 | 1 | + # i0 | (0.2, 0.12) | 1 | 2 | + # i1 | (-0.5, 0.07) | 1 | 2 | + # i2 | (1.2, 0.2) | 1 | 0 | + # i3 | (4.0, 0.13) | 2 | 1 | dense_column = array_ops.constant([0.62, 0.62, 0.3, 0.52]) gradients = array_ops.constant([0.2, -0.5, 1.2, 4.0]) hessians = array_ops.constant([0.12, 0.07, 0.2, 0.13]) - partition_ids = array_ops.constant([0, 0, 0, 1], dtype=dtypes.int32) + partition_ids = array_ops.constant([1, 1, 1, 2], dtype=dtypes.int32) class_id = -1 gradient_shape = tensor_shape.scalar() @@ -254,7 +254,7 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): self.assertFalse(are_splits_ready) self.assertTrue(are_splits_ready2) - self.assertAllEqual([0, 1], partitions) + self.assertAllEqual([1, 2], partitions) oblivious_split_info = split_info_pb2.ObliviousSplitInfo() oblivious_split_info.ParseFromString(splits[0]) @@ -263,52 +263,57 @@ class DenseSplitHandlerTest(test_util.TensorFlowTestCase): self.assertAllClose(0.3, split_node.threshold, 0.00001) self.assertEqual(0, split_node.feature_column) - # Check the split on partition 0. + # Check the split on partition 1. # -(1.2 - 0.1) / (0.2 + 1) - expected_left_weight_0 = -0.9166666666666666 + expected_left_weight_1 = -0.9166666666666666 - # expected_left_weight_0 * -(1.2 - 0.1) - expected_left_gain_0 = 1.008333333333333 + # expected_left_weight_1 * -(1.2 - 0.1) + expected_left_gain_1 = 1.008333333333333 # (-0.5 + 0.2 + 0.1) / (0.19 + 1) - expected_right_weight_0 = 0.1680672 + expected_right_weight_1 = 0.1680672 - # expected_right_weight_0 * -(-0.5 + 0.2 + 0.1)) - expected_right_gain_0 = 0.033613445378151252 + # expected_right_weight_1 * -(-0.5 + 0.2 + 0.1)) + expected_right_gain_1 = 0.033613445378151252 # (0.2 + -0.5 + 1.2 - 0.1) ** 2 / (0.12 + 0.07 + 0.2 + 1) - expected_bias_gain_0 = 0.46043165467625896 + expected_bias_gain_1 = 0.46043165467625896 left_child = oblivious_split_info.children[0].vector right_child = oblivious_split_info.children[1].vector - self.assertAllClose([expected_left_weight_0], left_child.value, 0.00001) + self.assertAllClose([expected_left_weight_1], left_child.value, 0.00001) - self.assertAllClose([expected_right_weight_0], right_child.value, 0.00001) + self.assertAllClose([expected_right_weight_1], right_child.value, 0.00001) - # Check the split on partition 1. - expected_left_weight_1 = 0 - expected_left_gain_1 = 0 + # Check the split on partition 2. + expected_left_weight_2 = 0 + expected_left_gain_2 = 0 # -(4 - 0.1) / (0.13 + 1) - expected_right_weight_1 = -3.4513274336283186 - # expected_right_weight_1 * -(4 - 0.1) - expected_right_gain_1 = 13.460176991150442 + expected_right_weight_2 = -3.4513274336283186 + # expected_right_weight_2 * -(4 - 0.1) + expected_right_gain_2 = 13.460176991150442 # (-4 + 0.1) ** 2 / (0.13 + 1) - expected_bias_gain_1 = 13.460176991150442 + expected_bias_gain_2 = 13.460176991150442 left_child = oblivious_split_info.children[2].vector right_child = oblivious_split_info.children[3].vector - self.assertAllClose([expected_left_weight_1], left_child.value, 0.00001) + self.assertAllClose([expected_left_weight_2], left_child.value, 0.00001) - self.assertAllClose([expected_right_weight_1], right_child.value, 0.00001) + self.assertAllClose([expected_right_weight_2], right_child.value, 0.00001) # The layer gain is the sum of the gains of each partition layer_gain = ( - expected_left_gain_0 + expected_right_gain_0 - expected_bias_gain_0) + ( - expected_left_gain_1 + expected_right_gain_1 - expected_bias_gain_1) + expected_left_gain_1 + expected_right_gain_1 - expected_bias_gain_1) + ( + expected_left_gain_2 + expected_right_gain_2 - expected_bias_gain_2) self.assertAllClose(layer_gain, gains[0], 0.00001) + # We have examples in both partitions, then we get both ids. + self.assertEqual(2, len(oblivious_split_info.children_parent_id)) + self.assertEqual(1, oblivious_split_info.children_parent_id[0]) + self.assertEqual(2, oblivious_split_info.children_parent_id[1]) + def testGenerateFeatureSplitCandidatesLossUsesSumReduction(self): with self.test_session() as sess: # The data looks like the following: diff --git a/tensorflow/contrib/boosted_trees/proto/split_info.proto b/tensorflow/contrib/boosted_trees/proto/split_info.proto index 65448996bf..784977af39 100644 --- a/tensorflow/contrib/boosted_trees/proto/split_info.proto +++ b/tensorflow/contrib/boosted_trees/proto/split_info.proto @@ -21,4 +21,8 @@ message SplitInfo { message ObliviousSplitInfo { tensorflow.boosted_trees.trees.TreeNode split_node = 1; repeated tensorflow.boosted_trees.trees.Leaf children = 2; + // For each child, children_parent_id stores the node_id of its parent when it + // was a leaf. For the idx-th child it corresponds the idx/2-th + // children_parent_id. + repeated int32 children_parent_id = 3; } diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py index 278dc1f756..b3e4c2e5f7 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py @@ -91,7 +91,8 @@ def _gen_dense_split_info(fc, threshold, left_weight, right_weight): return split.SerializeToString() -def _gen_dense_oblivious_split_info(fc, threshold, leave_weights): +def _gen_dense_oblivious_split_info(fc, threshold, leave_weights, + children_parent_id): split_str = """ split_node { oblivious_dense_float_binary_split { @@ -107,6 +108,9 @@ def _gen_dense_oblivious_split_info(fc, threshold, leave_weights): } }""" % ( weight) + for x in children_parent_id: + split_str += """ + children_parent_id: %d""" % (x) split = split_info_pb2.ObliviousSplitInfo() text_format.Merge(split_str, split) return split.SerializeToString() @@ -432,14 +436,18 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase): handler1_partitions = np.array([0], dtype=np.int32) handler1_gains = np.array([7.62], dtype=np.float32) handler1_split = [ - _gen_dense_oblivious_split_info(0, 0.52, [-4.375, 7.143]) + _gen_dense_oblivious_split_info(0, 0.52, [-4.375, 7.143], [0]) ] handler2_partitions = np.array([0], dtype=np.int32) handler2_gains = np.array([0.63], dtype=np.float32) - handler2_split = [_gen_dense_oblivious_split_info(0, 0.23, [-0.6, 0.24])] + handler2_split = [ + _gen_dense_oblivious_split_info(0, 0.23, [-0.6, 0.24], [0]) + ] handler3_partitions = np.array([0], dtype=np.int32) handler3_gains = np.array([7.62], dtype=np.float32) - handler3_split = [_gen_dense_oblivious_split_info(0, 7, [-4.375, 7.143])] + handler3_split = [ + _gen_dense_oblivious_split_info(0, 7, [-4.375, 7.143], [0]) + ] # Grow tree ensemble. grow_op = training_ops.grow_tree_ensemble( @@ -1675,17 +1683,20 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase): handler1_partitions = np.array([0], dtype=np.int32) handler1_gains = np.array([1.4], dtype=np.float32) handler1_split = [ - _gen_dense_oblivious_split_info(0, 0.21, [-6.0, 1.65, 1.0, -0.5]) + _gen_dense_oblivious_split_info(0, 0.21, [-6.0, 1.65, 1.0, -0.5], + [1, 2]) ] handler2_partitions = np.array([0], dtype=np.int32) handler2_gains = np.array([2.7], dtype=np.float32) handler2_split = [ - _gen_dense_oblivious_split_info(0, 0.23, [-0.6, 0.24, 0.3, 0.4]), + _gen_dense_oblivious_split_info(0, 0.23, [-0.6, 0.24, 0.3, 0.4], + [1, 2]) ] handler3_partitions = np.array([0], dtype=np.int32) handler3_gains = np.array([1.7], dtype=np.float32) handler3_split = [ - _gen_dense_oblivious_split_info(0, 3, [-0.75, 1.93, 0.2, -0.1]) + _gen_dense_oblivious_split_info(0, 3, [-0.75, 1.93, 0.2, -0.1], + [1, 2]) ] # Grow tree ensemble layer by layer. @@ -1797,6 +1808,528 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase): self.assertEqual(stats.attempted_layers, 2) self.assertProtoEquals(expected_result, tree_ensemble_config) + def testGrowEnsembleWithEmptyNodesMiddleCase(self): + """Test case: The middle existing leaves don't have examples.""" + with self.test_session() as session: + tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() + text_format.Merge( + """ + trees { + nodes { + oblivious_dense_float_binary_split { + feature_column: 4 + threshold: 7 + } + node_metadata { + gain: 7.62 + original_oblivious_leaves { + } + } + } + nodes { + oblivious_dense_float_binary_split { + feature_column: 1 + threshold: 0.23 + } + node_metadata { + gain: 2.7 + original_oblivious_leaves { + vector { + value: 7.143 + } + } + original_oblivious_leaves { + vector { + value: -4.375 + } + } + } + } + nodes { + leaf { + vector { + value: 6.543 + } + } + } + nodes { + leaf { + vector { + value: 7.5 + } + } + } + nodes { + leaf { + vector { + value: -4.075 + } + } + } + nodes { + leaf { + vector { + value: -3.975 + } + } + } + } + tree_weights: 0.1 + tree_metadata { + num_tree_weight_updates: 1 + num_layers_grown: 2 + } + growing_metadata { + num_trees_attempted: 1 + num_layers_attempted: 2 + } + """, tree_ensemble_config) + tree_ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=0, + tree_ensemble_config=tree_ensemble_config.SerializeToString(), + name="tree_ensemble") + resources.initialize_resources(resources.shared_resources()).run() + + # Prepare learner config. + learner_config = _gen_learner_config( + num_classes=2, + l1_reg=0, + l2_reg=0, + tree_complexity=0, + max_depth=6, + min_node_weight=0, + pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE, + growing_mode=learner_pb2.LearnerConfig.LAYER_BY_LAYER) + + # Prepare handler inputs. + handler1_partitions = np.array([0], dtype=np.int32) + handler1_gains = np.array([1.8], dtype=np.float32) + handler1_split = [ + _gen_dense_oblivious_split_info(0, 0.9, [1.0, 2.0, 3.0, 4.0], [2, 5]) + ] + # The tree currently has depth 2, so the ids for the four leaves are in + # the range [2, 6). In this test case we are assuming that our examples + # only fall in leaves 2 and 5. + + # Grow tree ensemble layer by layer. + grow_op = training_ops.grow_tree_ensemble( + tree_ensemble_handle, + stamp_token=0, + next_stamp_token=1, + learning_rate=0.1, + partition_ids=[handler1_partitions], + gains=[handler1_gains], + splits=[handler1_split], + learner_config=learner_config.SerializeToString(), + dropout_seed=123, + center_bias=True, + max_tree_depth=learner_config.constraints.max_tree_depth, + weak_learner_type=learner_pb2.LearnerConfig.OBLIVIOUS_DECISION_TREE) + session.run(grow_op) + + new_stamp, serialized = session.run( + model_ops.tree_ensemble_serialize(tree_ensemble_handle)) + stats = session.run( + training_ops.tree_ensemble_stats(tree_ensemble_handle, stamp_token=1)) + tree_ensemble_config.ParseFromString(serialized) + expected_result = """ + trees { + nodes { + oblivious_dense_float_binary_split { + feature_column: 4 + threshold: 7 + } + node_metadata { + gain: 7.62 + original_oblivious_leaves { + } + } + } + nodes { + oblivious_dense_float_binary_split { + feature_column: 1 + threshold: 0.23 + } + node_metadata { + gain: 2.7 + original_oblivious_leaves { + vector { + value: 7.143 + } + } + original_oblivious_leaves { + vector { + value: -4.375 + } + } + } + } + nodes { + oblivious_dense_float_binary_split { + feature_column: 0 + threshold: 0.9 + } + node_metadata { + gain: 1.8 + original_oblivious_leaves { + vector { + value: 6.543 + } + } + original_oblivious_leaves { + vector { + value: 7.5 + } + } + original_oblivious_leaves { + vector { + value: -4.075 + } + } + original_oblivious_leaves { + vector { + value: -3.975 + } + } + } + } + nodes { + leaf { + vector { + value: 7.543 + } + } + } + nodes { + leaf { + vector { + value: 8.543 + } + } + } + nodes { + leaf { + vector { + value: 7.5 + } + } + } + nodes { + leaf { + vector { + value: 7.5 + } + } + } + nodes { + leaf { + vector { + value: -4.075 + } + } + } + nodes { + leaf { + vector { + value: -4.075 + } + } + } + nodes { + leaf { + vector { + value: -0.975 + } + } + } + nodes { + leaf { + vector { + value: 0.025 + } + } + } + } + tree_weights: 0.1 + tree_metadata { + num_tree_weight_updates: 1 + num_layers_grown: 3 + } + growing_metadata { + num_trees_attempted: 1 + num_layers_attempted: 3 + } + """ + self.assertEqual(new_stamp, 1) + self.assertEqual(stats.num_trees, 0) + self.assertEqual(stats.num_layers, 3) + self.assertEqual(stats.active_tree, 1) + self.assertEqual(stats.active_layer, 3) + self.assertEqual(stats.attempted_trees, 1) + self.assertEqual(stats.attempted_layers, 3) + self.assertProtoEquals(expected_result, tree_ensemble_config) + + def testGrowEnsembleWithEmptyNodesBorderCase(self): + """Test case: The first and last existing leaves don't have examples.""" + with self.test_session() as session: + tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() + text_format.Merge( + """ + trees { + nodes { + oblivious_dense_float_binary_split { + feature_column: 4 + threshold: 7 + } + node_metadata { + gain: 7.62 + original_oblivious_leaves { + } + } + } + nodes { + oblivious_dense_float_binary_split { + feature_column: 1 + threshold: 0.23 + } + node_metadata { + gain: 2.7 + original_oblivious_leaves { + vector { + value: 7.143 + } + } + original_oblivious_leaves { + vector { + value: -4.375 + } + } + } + } + nodes { + leaf { + vector { + value: 6.543 + } + } + } + nodes { + leaf { + vector { + value: 7.5 + } + } + } + nodes { + leaf { + vector { + value: -4.075 + } + } + } + nodes { + leaf { + vector { + value: -3.975 + } + } + } + } + tree_weights: 0.1 + tree_metadata { + num_tree_weight_updates: 1 + num_layers_grown: 2 + } + growing_metadata { + num_trees_attempted: 1 + num_layers_attempted: 2 + } + """, tree_ensemble_config) + tree_ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=0, + tree_ensemble_config=tree_ensemble_config.SerializeToString(), + name="tree_ensemble") + resources.initialize_resources(resources.shared_resources()).run() + + # Prepare learner config. + learner_config = _gen_learner_config( + num_classes=2, + l1_reg=0, + l2_reg=0, + tree_complexity=0, + max_depth=6, + min_node_weight=0, + pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE, + growing_mode=learner_pb2.LearnerConfig.LAYER_BY_LAYER) + + # Prepare handler inputs. + handler1_partitions = np.array([0], dtype=np.int32) + handler1_gains = np.array([1.8], dtype=np.float32) + handler1_split = [ + _gen_dense_oblivious_split_info(0, 0.9, [1.0, 2.0, 3.0, 4.0], [3, 4]) + ] + # The tree currently has depth 2, so the ids for the four leaves are in + # the range [2, 6). In this test case we are assuming that our examples + # only fall in leaves 3 and 4. + + # Grow tree ensemble layer by layer. + grow_op = training_ops.grow_tree_ensemble( + tree_ensemble_handle, + stamp_token=0, + next_stamp_token=1, + learning_rate=0.1, + partition_ids=[handler1_partitions], + gains=[handler1_gains], + splits=[handler1_split], + learner_config=learner_config.SerializeToString(), + dropout_seed=123, + center_bias=True, + max_tree_depth=learner_config.constraints.max_tree_depth, + weak_learner_type=learner_pb2.LearnerConfig.OBLIVIOUS_DECISION_TREE) + session.run(grow_op) + + new_stamp, serialized = session.run( + model_ops.tree_ensemble_serialize(tree_ensemble_handle)) + stats = session.run( + training_ops.tree_ensemble_stats(tree_ensemble_handle, stamp_token=1)) + tree_ensemble_config.ParseFromString(serialized) + expected_result = """ + trees { + nodes { + oblivious_dense_float_binary_split { + feature_column: 4 + threshold: 7 + } + node_metadata { + gain: 7.62 + original_oblivious_leaves { + } + } + } + nodes { + oblivious_dense_float_binary_split { + feature_column: 1 + threshold: 0.23 + } + node_metadata { + gain: 2.7 + original_oblivious_leaves { + vector { + value: 7.143 + } + } + original_oblivious_leaves { + vector { + value: -4.375 + } + } + } + } + nodes { + oblivious_dense_float_binary_split { + feature_column: 0 + threshold: 0.9 + } + node_metadata { + gain: 1.8 + original_oblivious_leaves { + vector { + value: 6.543 + } + } + original_oblivious_leaves { + vector { + value: 7.5 + } + } + original_oblivious_leaves { + vector { + value: -4.075 + } + } + original_oblivious_leaves { + vector { + value: -3.975 + } + } + } + } + nodes { + leaf { + vector { + value: 6.543 + } + } + } + nodes { + leaf { + vector { + value: 6.543 + } + } + } + nodes { + leaf { + vector { + value: 8.5 + } + } + } + nodes { + leaf { + vector { + value: 9.5 + } + } + } + nodes { + leaf { + vector { + value: -1.075 + } + } + } + nodes { + leaf { + vector { + value: -0.075 + } + } + } + nodes { + leaf { + vector { + value: -3.975 + } + } + } + nodes { + leaf { + vector { + value: -3.975 + } + } + } + } + tree_weights: 0.1 + tree_metadata { + num_tree_weight_updates: 1 + num_layers_grown: 3 + } + growing_metadata { + num_trees_attempted: 1 + num_layers_attempted: 3 + } + """ + self.assertEqual(new_stamp, 1) + self.assertEqual(stats.num_trees, 0) + self.assertEqual(stats.num_layers, 3) + self.assertEqual(stats.active_tree, 1) + self.assertEqual(stats.active_layer, 3) + self.assertEqual(stats.attempted_trees, 1) + self.assertEqual(stats.attempted_layers, 3) + self.assertProtoEquals(expected_result, tree_ensemble_config) + def testGrowExistingEnsembleTreeFinalizedWithDropout(self): """Test growing an existing ensemble with the last tree finalized.""" with self.cached_session() as session: -- GitLab From 13a4688fc32642ca9ca415ae77d445b95f31600c Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Wed, 22 Aug 2018 14:43:45 -0700 Subject: [PATCH 087/380] Convert TensorFlow's flatbuffers dependency to new third party import method PiperOrigin-RevId: 209833197 --- .../contrib/lite/delegates/eager/kernel.cc | 2 +- .../contrib/lite/delegates/eager/test_util.cc | 2 +- .../kernels/ctc_beam_search_decoder.cc | 2 +- .../kernels/ctc_beam_search_decoder_test.cc | 2 +- .../contrib/lite/kernels/audio_spectrogram.cc | 2 +- .../lite/kernels/audio_spectrogram_test.cc | 2 +- .../lite/kernels/detection_postprocess.cc | 2 +- .../kernels/detection_postprocess_test.cc | 2 +- tensorflow/contrib/lite/kernels/mfcc.cc | 2 +- tensorflow/contrib/lite/kernels/mfcc_test.cc | 2 +- tensorflow/contrib/lite/schema/BUILD | 2 +- .../schema/flatbuffer_compatibility_test.cc | 2 +- tensorflow/workspace.bzl | 16 +- third_party/flatbuffers/BUILD | 16 +- .../{flatbuffers.BUILD => BUILD.bazel} | 5 +- .../BUILD.system} | 0 third_party/flatbuffers/build_defs.bzl | 368 +++++++++--------- third_party/flatbuffers/workspace.bzl | 19 + 18 files changed, 231 insertions(+), 217 deletions(-) rename third_party/flatbuffers/{flatbuffers.BUILD => BUILD.bazel} (97%) rename third_party/{systemlibs/flatbuffers.BUILD => flatbuffers/BUILD.system} (100%) create mode 100644 third_party/flatbuffers/workspace.bzl diff --git a/tensorflow/contrib/lite/delegates/eager/kernel.cc b/tensorflow/contrib/lite/delegates/eager/kernel.cc index 1082b78725..febf0b85a4 100644 --- a/tensorflow/contrib/lite/delegates/eager/kernel.cc +++ b/tensorflow/contrib/lite/delegates/eager/kernel.cc @@ -14,7 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/lite/delegates/eager/kernel.h" -#include "flatbuffers/flexbuffers.h" +#include "flatbuffers/flexbuffers.h" // flatbuffers #include "tensorflow/contrib/lite/builtin_ops.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/context_util.h" diff --git a/tensorflow/contrib/lite/delegates/eager/test_util.cc b/tensorflow/contrib/lite/delegates/eager/test_util.cc index 26d96acc82..b8c9e2652a 100644 --- a/tensorflow/contrib/lite/delegates/eager/test_util.cc +++ b/tensorflow/contrib/lite/delegates/eager/test_util.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/contrib/lite/delegates/eager/test_util.h" #include "absl/memory/memory.h" -#include "flatbuffers/flexbuffers.h" +#include "flatbuffers/flexbuffers.h" // flatbuffers #include "tensorflow/contrib/lite/string.h" namespace tflite { diff --git a/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder.cc b/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder.cc index 834d1ebd66..121997dcb2 100644 --- a/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder.cc +++ b/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include -#include "flatbuffers/flexbuffers.h" +#include "flatbuffers/flexbuffers.h" // flatbuffers #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/experimental/kernels/ctc_beam_search.h" #include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" diff --git a/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder_test.cc b/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder_test.cc index 9d1e6a562f..32458305c4 100644 --- a/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder_test.cc +++ b/tensorflow/contrib/lite/experimental/kernels/ctc_beam_search_decoder_test.cc @@ -18,7 +18,7 @@ limitations under the License. #include #include -#include "flatbuffers/flexbuffers.h" +#include "flatbuffers/flexbuffers.h" // flatbuffers #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" #include "tensorflow/contrib/lite/kernels/test_util.h" diff --git a/tensorflow/contrib/lite/kernels/audio_spectrogram.cc b/tensorflow/contrib/lite/kernels/audio_spectrogram.cc index 91d8dd3fa7..1170d84553 100644 --- a/tensorflow/contrib/lite/kernels/audio_spectrogram.cc +++ b/tensorflow/contrib/lite/kernels/audio_spectrogram.cc @@ -22,7 +22,7 @@ limitations under the License. #include "tensorflow/contrib/lite/kernels/kernel_util.h" #include "tensorflow/contrib/lite/kernels/op_macros.h" -#include "flatbuffers/flexbuffers.h" +#include "flatbuffers/flexbuffers.h" // flatbuffers namespace tflite { namespace ops { diff --git a/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc b/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc index 8d460fdfc6..7346b9fd80 100644 --- a/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc +++ b/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc @@ -18,7 +18,7 @@ limitations under the License. #include #include -#include "flatbuffers/flexbuffers.h" +#include "flatbuffers/flexbuffers.h" // flatbuffers #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" #include "tensorflow/contrib/lite/kernels/test_util.h" diff --git a/tensorflow/contrib/lite/kernels/detection_postprocess.cc b/tensorflow/contrib/lite/kernels/detection_postprocess.cc index d7bde0ff79..136697f945 100644 --- a/tensorflow/contrib/lite/kernels/detection_postprocess.cc +++ b/tensorflow/contrib/lite/kernels/detection_postprocess.cc @@ -15,7 +15,7 @@ limitations under the License. #include #include #include -#include "flatbuffers/flexbuffers.h" +#include "flatbuffers/flexbuffers.h" // flatbuffers #include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" diff --git a/tensorflow/contrib/lite/kernels/detection_postprocess_test.cc b/tensorflow/contrib/lite/kernels/detection_postprocess_test.cc index 4e0f8484a3..94c91a6bd6 100644 --- a/tensorflow/contrib/lite/kernels/detection_postprocess_test.cc +++ b/tensorflow/contrib/lite/kernels/detection_postprocess_test.cc @@ -17,7 +17,7 @@ limitations under the License. #include #include -#include "flatbuffers/flexbuffers.h" +#include "flatbuffers/flexbuffers.h" // flatbuffers #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" #include "tensorflow/contrib/lite/kernels/test_util.h" diff --git a/tensorflow/contrib/lite/kernels/mfcc.cc b/tensorflow/contrib/lite/kernels/mfcc.cc index 3f5bc4d68a..306f676619 100644 --- a/tensorflow/contrib/lite/kernels/mfcc.cc +++ b/tensorflow/contrib/lite/kernels/mfcc.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include "tensorflow/contrib/lite/kernels/internal/mfcc.h" -#include "flatbuffers/flexbuffers.h" +#include "flatbuffers/flexbuffers.h" // flatbuffers #include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/kernels/internal/mfcc_dct.h" diff --git a/tensorflow/contrib/lite/kernels/mfcc_test.cc b/tensorflow/contrib/lite/kernels/mfcc_test.cc index 0291ca8c1c..c9124adcaf 100644 --- a/tensorflow/contrib/lite/kernels/mfcc_test.cc +++ b/tensorflow/contrib/lite/kernels/mfcc_test.cc @@ -18,7 +18,7 @@ limitations under the License. #include #include -#include "flatbuffers/flexbuffers.h" +#include "flatbuffers/flexbuffers.h" // flatbuffers #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" #include "tensorflow/contrib/lite/kernels/test_util.h" diff --git a/tensorflow/contrib/lite/schema/BUILD b/tensorflow/contrib/lite/schema/BUILD index b616e449e6..28a7e50003 100644 --- a/tensorflow/contrib/lite/schema/BUILD +++ b/tensorflow/contrib/lite/schema/BUILD @@ -48,7 +48,7 @@ exports_files([ "schema_v3.fbs", ]) -load("//third_party/flatbuffers:build_defs.bzl", "flatbuffer_cc_library") +load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library") # Generic schema for inference on device. flatbuffer_cc_library( diff --git a/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc b/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc index cd46a06f7d..11057203a8 100644 --- a/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc +++ b/tensorflow/contrib/lite/schema/flatbuffer_compatibility_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include #include -#include "flatbuffers/flatc.h" +#include "flatbuffers/flatc.h" // flatbuffers #include "tensorflow/core/platform/platform.h" #ifdef PLATFORM_GOOGLE diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5d90d0fe64..9d0ce34344 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -19,10 +19,10 @@ load( "//tensorflow/tools/def_file_filter:def_file_filter_configure.bzl", "def_file_filter_configure", ) +load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo") def initialize_third_party(): - # Fill in later - pass + flatbuffers() # Sanitize a dependency so that it works correctly from code that includes # TensorFlow as a submodule. @@ -740,18 +740,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): build_file = clean_dep("//third_party:arm_neon_2_x86_sse.BUILD"), ) - tf_http_archive( - name = "flatbuffers", - strip_prefix = "flatbuffers-1.9.0", - sha256 = "5ca5491e4260cacae30f1a5786d109230db3f3a6e5a0eb45d0d0608293d247e3", - urls = [ - "https://mirror.bazel.build/github.com/google/flatbuffers/archive/v1.9.0.tar.gz", - "https://github.com/google/flatbuffers/archive/v1.9.0.tar.gz", - ], - build_file = clean_dep("//third_party/flatbuffers:flatbuffers.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:flatbuffers.BUILD"), - ) - native.new_http_archive( name = "double_conversion", urls = [ diff --git a/third_party/flatbuffers/BUILD b/third_party/flatbuffers/BUILD index fbdf19f205..82bab3ffd9 100644 --- a/third_party/flatbuffers/BUILD +++ b/third_party/flatbuffers/BUILD @@ -1,15 +1 @@ -package(default_visibility = ["//visibility:public"]) - -licenses(["notice"]) # Apache 2.0 - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) +# This empty BUILD file is required to make Bazel treat this directory as a package. diff --git a/third_party/flatbuffers/flatbuffers.BUILD b/third_party/flatbuffers/BUILD.bazel similarity index 97% rename from third_party/flatbuffers/flatbuffers.BUILD rename to third_party/flatbuffers/BUILD.bazel index 4a3701e893..9d233a30d6 100644 --- a/third_party/flatbuffers/flatbuffers.BUILD +++ b/third_party/flatbuffers/BUILD.bazel @@ -19,7 +19,10 @@ config_setting( FLATBUFFERS_COPTS = select({ ":windows": [], - "//conditions:default": ["-Wno-implicit-fallthrough", "-fexceptions"], + "//conditions:default": [ + "-Wno-implicit-fallthrough", + "-fexceptions", + ], }) # Public flatc library to compile flatbuffer files at runtime. diff --git a/third_party/systemlibs/flatbuffers.BUILD b/third_party/flatbuffers/BUILD.system similarity index 100% rename from third_party/systemlibs/flatbuffers.BUILD rename to third_party/flatbuffers/BUILD.system diff --git a/third_party/flatbuffers/build_defs.bzl b/third_party/flatbuffers/build_defs.bzl index ae8d7feebe..2f25156668 100644 --- a/third_party/flatbuffers/build_defs.bzl +++ b/third_party/flatbuffers/build_defs.bzl @@ -1,5 +1,4 @@ -# Description: -# BUILD rules for generating flatbuffer files. +"""BUILD rules for generating flatbuffer files.""" flatc_path = "@flatbuffers//:flatc" @@ -8,66 +7,50 @@ DEFAULT_FLATC_ARGS = [ "--gen-object-api", ] -def flatbuffer_library_public(name, - srcs, - outs, - language_flag, - out_prefix="", - includes=[], - include_paths=[], - flatc_args=DEFAULT_FLATC_ARGS, - reflection_name="", - reflection_visiblity=None, - output_to_bindir=False): - '''Generates code files for reading/writing the given flatbuffers in the requested language using the public compiler. - - Args: - name: Rule name. - srcs: Source .fbs files. Sent in order to the compiler. - outs: Output files from flatc. - language_flag: Target language flag. One of [-c, -j, -js]. - out_prefix: Prepend this path to the front of all generated files except on - single source targets. Usually is a directory name. - includes: Optional, list of filegroups of schemas that the srcs depend on. - include_paths: Optional, list of paths the includes files can be found in. - flatc_args: Optional, list of additional arguments to pass to flatc. - reflection_name: Optional, if set this will generate the flatbuffer - reflection binaries for the schemas. - reflection_visiblity: The visibility of the generated reflection Fileset. - output_to_bindir: Passed to genrule for output to bin directory. - Outs: - filegroup(name): all generated source files. - Fileset([reflection_name]): (Optional) all generated reflection binaries. - ''' - include_paths_cmd = ["-I %s" % (s) for s in include_paths] - # '$(@D)' when given a single source target will give the appropriate - # directory. Appending 'out_prefix' is only necessary when given a build - # target with multiple sources. - output_directory = ( - ("-o $(@D)/%s" % (out_prefix)) if len(srcs) > 1 else ("-o $(@D)")) - genrule_cmd = " ".join([ - "for f in $(SRCS); do", - "$(location %s)" % (flatc_path), - " ".join(flatc_args), - " ".join(include_paths_cmd), - language_flag, - output_directory, - "$$f;", - "done", - ]) - native.genrule( - name=name, - srcs=srcs, - outs=outs, - output_to_bindir=output_to_bindir, - tools=includes + [flatc_path,], - cmd=genrule_cmd, - message="Generating flatbuffer files for %s:" % (name),) - if reflection_name: - reflection_genrule_cmd = " ".join([ +def flatbuffer_library_public( + name, + srcs, + outs, + language_flag, + out_prefix = "", + includes = [], + include_paths = [], + flatc_args = DEFAULT_FLATC_ARGS, + reflection_name = "", + reflection_visiblity = None, + output_to_bindir = False): + """Generates code files for reading/writing the given flatbuffers in the requested language using the public compiler. + + Outs: + filegroup(name): all generated source files. + Fileset([reflection_name]): (Optional) all generated reflection binaries. + + Args: + name: Rule name. + srcs: Source .fbs files. Sent in order to the compiler. + outs: Output files from flatc. + language_flag: Target language flag. One of [-c, -j, -js]. + out_prefix: Prepend this path to the front of all generated files except on + single source targets. Usually is a directory name. + includes: Optional, list of filegroups of schemas that the srcs depend on. + include_paths: Optional, list of paths the includes files can be found in. + flatc_args: Optional, list of additional arguments to pass to flatc. + reflection_name: Optional, if set this will generate the flatbuffer + reflection binaries for the schemas. + reflection_visiblity: The visibility of the generated reflection Fileset. + output_to_bindir: Passed to genrule for output to bin directory. + """ + include_paths_cmd = ["-I %s" % (s) for s in include_paths] + + # '$(@D)' when given a single source target will give the appropriate + # directory. Appending 'out_prefix' is only necessary when given a build + # target with multiple sources. + output_directory = ( + ("-o $(@D)/%s" % (out_prefix)) if len(srcs) > 1 else ("-o $(@D)") + ) + genrule_cmd = " ".join([ "for f in $(SRCS); do", "$(location %s)" % (flatc_path), - "-b --schema", " ".join(flatc_args), " ".join(include_paths_cmd), language_flag, @@ -75,122 +58,157 @@ def flatbuffer_library_public(name, "$$f;", "done", ]) - reflection_outs = [ - (out_prefix + "%s.bfbs") % (s.replace(".fbs", "").split("/")[-1]) for s in srcs - ] native.genrule( - name= "%s_srcs" % reflection_name, - srcs=srcs, - outs=reflection_outs, - output_to_bindir=output_to_bindir, - tools=includes + [flatc_path,], - cmd=reflection_genrule_cmd, - message="Generating flatbuffer reflection binary for %s:" % (name),) - native.Fileset( - name=reflection_name, - out="%s_out" % reflection_name, - entries=[ - native.FilesetEntry(files=reflection_outs), - ], - visibility=reflection_visiblity + name = name, + srcs = srcs, + outs = outs, + output_to_bindir = output_to_bindir, + tools = includes + [flatc_path], + cmd = genrule_cmd, + message = "Generating flatbuffer files for %s:" % (name), ) + if reflection_name: + reflection_genrule_cmd = " ".join([ + "for f in $(SRCS); do", + "$(location %s)" % (flatc_path), + "-b --schema", + " ".join(flatc_args), + " ".join(include_paths_cmd), + language_flag, + output_directory, + "$$f;", + "done", + ]) + reflection_outs = [ + (out_prefix + "%s.bfbs") % (s.replace(".fbs", "").split("/")[-1]) + for s in srcs + ] + native.genrule( + name = "%s_srcs" % reflection_name, + srcs = srcs, + outs = reflection_outs, + output_to_bindir = output_to_bindir, + tools = includes + [flatc_path], + cmd = reflection_genrule_cmd, + message = "Generating flatbuffer reflection binary for %s:" % (name), + ) + native.Fileset( + name = reflection_name, + out = "%s_out" % reflection_name, + entries = [ + native.FilesetEntry(files = reflection_outs), + ], + visibility = reflection_visiblity, + ) + +def flatbuffer_cc_library( + name, + srcs, + srcs_filegroup_name = "", + out_prefix = "", + includes = [], + include_paths = [], + flatc_args = DEFAULT_FLATC_ARGS, + visibility = None, + srcs_filegroup_visibility = None, + gen_reflections = False): + '''A cc_library with the generated reader/writers for the given flatbuffer definitions. + + Outs: + filegroup([name]_srcs): all generated .h files. + filegroup(srcs_filegroup_name if specified, or [name]_includes if not): + Other flatbuffer_cc_library's can pass this in for their `includes` + parameter, if they depend on the schemas in this library. + Fileset([name]_reflection): (Optional) all generated reflection binaries. + cc_library([name]): library with sources and flatbuffers deps. + + Remarks: + ** Because the genrule used to call flatc does not have any trivial way of + computing the output list of files transitively generated by includes and + --gen-includes (the default) being defined for flatc, the --gen-includes + flag will not work as expected. The way around this is to add a dependency + to the flatbuffer_cc_library defined alongside the flatc included Fileset. + For example you might define: + + flatbuffer_cc_library( + name = "my_fbs", + srcs = [ "schemas/foo.fbs" ], + includes = [ "//third_party/bazz:bazz_fbs_includes" ], + ) + In which foo.fbs includes a few files from the Fileset defined at + //third_party/bazz:bazz_fbs_includes. When compiling the library that + includes foo_generated.h, and therefore has my_fbs as a dependency, it + will fail to find any of the bazz *_generated.h files unless you also + add bazz's flatbuffer_cc_library to your own dependency list, e.g.: -def flatbuffer_cc_library(name, srcs, srcs_filegroup_name="", - out_prefix="", includes=[], include_paths=[], - flatc_args=DEFAULT_FLATC_ARGS, - visibility=None, srcs_filegroup_visibility=None, - gen_reflections=False): - '''A cc_library with the generated reader/writers for the given flatbuffer definitions. - - Args: - name: Rule name. - srcs: Source .fbs files. Sent in order to the compiler. - srcs_filegroup_name: Name of the output filegroup that holds srcs. Pass this - filegroup into the `includes` parameter of any other - flatbuffer_cc_library that depends on this one's schemas. - out_prefix: Prepend this path to the front of all generated files. Usually - is a directory name. - includes: Optional, list of filegroups of schemas that the srcs depend on. - ** SEE REMARKS BELOW ** - include_paths: Optional, list of paths the includes files can be found in. - flatc_args: Optional list of additional arguments to pass to flatc - (e.g. --gen-mutable). - visibility: The visibility of the generated cc_library. By default, use the - default visibility of the project. - srcs_filegroup_visibility: The visibility of the generated srcs filegroup. - By default, use the value of the visibility parameter above. - gen_reflections: Optional, if true this will generate the flatbuffer - reflection binaries for the schemas. - Outs: - filegroup([name]_srcs): all generated .h files. - filegroup(srcs_filegroup_name if specified, or [name]_includes if not): - Other flatbuffer_cc_library's can pass this in for their `includes` - parameter, if they depend on the schemas in this library. - Fileset([name]_reflection): (Optional) all generated reflection binaries. - cc_library([name]): library with sources and flatbuffers deps. - - Remarks: - ** Because the genrule used to call flatc does not have any trivial way of - computing the output list of files transitively generated by includes and - --gen-includes (the default) being defined for flatc, the --gen-includes - flag will not work as expected. The way around this is to add a dependency - to the flatbuffer_cc_library defined alongside the flatc included Fileset. - For example you might define: - - flatbuffer_cc_library( - name = "my_fbs", - srcs = [ "schemas/foo.fbs" ], - includes = [ "//third_party/bazz:bazz_fbs_includes" ], - ) - - In which foo.fbs includes a few files from the Fileset defined at - //third_party/bazz:bazz_fbs_includes. When compiling the library that - includes foo_generated.h, and therefore has my_fbs as a dependency, it - will fail to find any of the bazz *_generated.h files unless you also - add bazz's flatbuffer_cc_library to your own dependency list, e.g.: - - cc_library( - name = "my_lib", - deps = [ - ":my_fbs", - "//third_party/bazz:bazz_fbs" - ], - ) - - Happy dependent Flatbuffering! - ''' - output_headers = [ - (out_prefix + "%s_generated.h") % (s.replace(".fbs", "").split("/")[-1]) for s in srcs - ] - reflection_name = "%s_reflection" % name if gen_reflections else "" - - flatbuffer_library_public(name="%s_srcs" % (name), - srcs=srcs, - outs=output_headers, - language_flag="-c", - out_prefix=out_prefix, - includes=includes, - include_paths=include_paths, - flatc_args=flatc_args, - reflection_name=reflection_name, - reflection_visiblity=visibility,) - native.cc_library(name=name, - hdrs=output_headers, - srcs=output_headers, - features=[ - "-parse_headers", - ], - deps=[ - "@flatbuffers//:runtime_cc", - ], - includes=["."], - linkstatic=1, - visibility=visibility) - - # A filegroup for the `srcs`. That is, all the schema files for this - # Flatbuffer set. - native.filegroup( - name = srcs_filegroup_name if srcs_filegroup_name else "%s_includes" % (name), - srcs = srcs, - visibility=srcs_filegroup_visibility if srcs_filegroup_visibility != None else visibility) + cc_library( + name = "my_lib", + deps = [ + ":my_fbs", + "//third_party/bazz:bazz_fbs" + ], + ) + + Happy dependent Flatbuffering! + + Args: + name: Rule name. + srcs: Source .fbs files. Sent in order to the compiler. + srcs_filegroup_name: Name of the output filegroup that holds srcs. Pass this + filegroup into the `includes` parameter of any other + flatbuffer_cc_library that depends on this one's schemas. + out_prefix: Prepend this path to the front of all generated files. Usually + is a directory name. + includes: Optional, list of filegroups of schemas that the srcs depend on. + ** SEE REMARKS BELOW ** + include_paths: Optional, list of paths the includes files can be found in. + flatc_args: Optional list of additional arguments to pass to flatc + (e.g. --gen-mutable). + visibility: The visibility of the generated cc_library. By default, use the + default visibility of the project. + srcs_filegroup_visibility: The visibility of the generated srcs filegroup. + By default, use the value of the visibility parameter above. + gen_reflections: Optional, if true this will generate the flatbuffer + reflection binaries for the schemas. + ''' + output_headers = [ + (out_prefix + "%s_generated.h") % (s.replace(".fbs", "").split("/")[-1]) + for s in srcs + ] + reflection_name = "%s_reflection" % name if gen_reflections else "" + + flatbuffer_library_public( + name = "%s_srcs" % (name), + srcs = srcs, + outs = output_headers, + language_flag = "-c", + out_prefix = out_prefix, + includes = includes, + include_paths = include_paths, + flatc_args = flatc_args, + reflection_name = reflection_name, + reflection_visiblity = visibility, + ) + native.cc_library( + name = name, + hdrs = output_headers, + srcs = output_headers, + features = [ + "-parse_headers", + ], + deps = [ + "@flatbuffers//:runtime_cc", + ], + includes = ["."], + linkstatic = 1, + visibility = visibility, + ) + + # A filegroup for the `srcs`. That is, all the schema files for this + # Flatbuffer set. + native.filegroup( + name = srcs_filegroup_name if srcs_filegroup_name else "%s_includes" % (name), + srcs = srcs, + visibility = srcs_filegroup_visibility if srcs_filegroup_visibility != None else visibility, + ) diff --git a/third_party/flatbuffers/workspace.bzl b/third_party/flatbuffers/workspace.bzl new file mode 100644 index 0000000000..3aeef96a72 --- /dev/null +++ b/third_party/flatbuffers/workspace.bzl @@ -0,0 +1,19 @@ +"""Loads the Flatbuffers library, used by TF Lite.""" + +load("//third_party:repo.bzl", "third_party_http_archive") + +def repo(): + third_party_http_archive( + name = "flatbuffers", + strip_prefix = "flatbuffers-1.9.0", + sha256 = "5ca5491e4260cacae30f1a5786d109230db3f3a6e5a0eb45d0d0608293d247e3", + urls = [ + "https://mirror.bazel.build/github.com/google/flatbuffers/archive/v1.9.0.tar.gz", + "https://github.com/google/flatbuffers/archive/v1.9.0.tar.gz", + ], + build_file = "//third_party/flatbuffers:BUILD.bazel", + system_build_file = "//third_party/flatbuffers:BUILD.system", + link_files = { + "//third_party/flatbuffers:build_defs.bzl": "build_defs.bzl", + }, + ) -- GitLab From b56e4377687b95014fa8dadc8e99192484fa79a0 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 22 Aug 2018 14:58:59 -0700 Subject: [PATCH 088/380] Disable random_forest_test in asan. PiperOrigin-RevId: 209835865 --- tensorflow/contrib/tensor_forest/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 22d6e499d2..cf55fec488 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -538,6 +538,7 @@ py_test( srcs = ["client/random_forest_test.py"], srcs_version = "PY2AND3", tags = [ + "noasan", "nomac", # b/63258195 "notsan", ], -- GitLab From fb3bde1994d4ed7d6cb928326e8e2a1777930e5e Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Wed, 22 Aug 2018 15:00:02 -0700 Subject: [PATCH 089/380] [tf.data] Implements `dataset` transformation `parse_example_dataset(..)` which will replace dataset.map(parsing_ops.parse_example(..)). PiperOrigin-RevId: 209836033 --- .../contrib/data/python/kernel_tests/BUILD | 22 + .../python/kernel_tests/parsing_ops_test.py | 850 ++++++++++++++++++ tensorflow/contrib/data/python/ops/BUILD | 20 +- .../contrib/data/python/ops/parsing_ops.py | 152 ++++ tensorflow/contrib/data/python/ops/readers.py | 8 +- .../api_def_ParseExampleDataset.pbtxt | 69 ++ .../api_def_ParseExampleDataset.pbtxt | 4 + tensorflow/core/kernels/data/BUILD | 10 + .../kernels/data/parse_example_dataset_op.cc | 347 +++++++ tensorflow/core/ops/dataset_ops.cc | 17 + tensorflow/python/ops/parsing_ops.py | 178 ++-- 11 files changed, 1602 insertions(+), 75 deletions(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py create mode 100644 tensorflow/contrib/data/python/ops/parsing_ops.py create mode 100644 tensorflow/core/api_def/base_api/api_def_ParseExampleDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ParseExampleDataset.pbtxt create mode 100644 tensorflow/core/kernels/data/parse_example_dataset_op.cc diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 220f1adf7f..a673c4b6f9 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -237,6 +237,27 @@ py_test( ], ) +py_test( + name = "parsing_ops_test", + size = "small", + srcs = ["parsing_ops_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/ops:parsing_ops", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:platform", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + "//third_party/py/numpy", + ], +) + cuda_py_test( name = "prefetching_ops_test", size = "small", @@ -323,6 +344,7 @@ py_test( "//tensorflow/python:parsing_ops", "//tensorflow/python:string_ops", "//tensorflow/python/data/ops:readers", + "//tensorflow/python/data/util:nest", "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py new file mode 100644 index 0000000000..f6c4a984b8 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/parsing_ops_test.py @@ -0,0 +1,850 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow.ops.parsing_ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy + +import numpy as np + +from tensorflow.contrib.data.python.ops import parsing_ops as contrib_parsing_ops +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import nest +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import parsing_ops +from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging + +# Helpers for creating Example objects +example = example_pb2.Example +feature = feature_pb2.Feature +features = lambda d: feature_pb2.Features(feature=d) +bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v)) +int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v)) +float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v)) +# Helpers for creating SequenceExample objects +feature_list = lambda l: feature_pb2.FeatureList(feature=l) +feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d) +sequence_example = example_pb2.SequenceExample + + +def _compare_output_to_expected(tester, dict_tensors, expected_tensors, + flat_output): + tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys())) + + i = 0 # Index into the flattened output of session.run() + for k, v in sorted(dict_tensors.items()): + # TODO(shivaniagrawal): flat_output is same as v. + expected_v = expected_tensors[k] + tf_logging.info("Comparing key: %s", k) + print("i", i, "flat_output", flat_output[i], "expected_v", expected_v) + if sparse_tensor.is_sparse(v): + # Three outputs for SparseTensor : indices, values, shape. + tester.assertEqual([k, len(expected_v)], [k, 3]) + print("i", i, "flat_output", flat_output[i].indices, "expected_v", + expected_v[0]) + tester.assertAllEqual(expected_v[0], flat_output[i].indices) + tester.assertAllEqual(expected_v[1], flat_output[i].values) + tester.assertAllEqual(expected_v[2], flat_output[i].dense_shape) + else: + # One output for standard Tensor. + tester.assertAllEqual(expected_v, flat_output[i]) + i += 1 + + +class ParseExampleTest(test.TestCase): + + def _test(self, + input_tensor, + feature_val, + expected_values=None, + expected_err=None): + + with self.test_session() as sess: + if expected_err: + with self.assertRaisesWithPredicateMatch(expected_err[0], + expected_err[1]): + dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply( + contrib_parsing_ops.parse_example_dataset(feature_val)) + get_next = dataset.make_one_shot_iterator().get_next() + sess.run(get_next) + return + else: + # Returns dict w/ Tensors and SparseTensors. + # Check values. + dataset = dataset_ops.Dataset.from_tensors(input_tensor).apply( + contrib_parsing_ops.parse_example_dataset(feature_val)) + get_next = dataset.make_one_shot_iterator().get_next() + result = sess.run(get_next) + flattened = nest.flatten(result) + print("result", result, "expected_values", expected_values) + _compare_output_to_expected(self, result, expected_values, flattened) + + # Check shapes; if serialized is a Tensor we need its size to + # properly check. + batch_size = ( + input_tensor.eval().size if isinstance(input_tensor, ops.Tensor) else + np.asarray(input_tensor).size) + for k, f in feature_val.items(): + print("output_shapes as list ", + tuple(dataset.output_shapes[k].as_list())) + if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None: + self.assertEqual(dataset.output_shapes[k].as_list()[0], batch_size) + elif isinstance(f, parsing_ops.VarLenFeature): + self.assertEqual(dataset.output_shapes[k].as_list()[1], None) + + def testEmptySerializedWithAllDefaults(self): + sparse_name = "st_a" + a_name = "a" + b_name = "b" + c_name = "c:has_a_tricky_name" + a_default = [0, 42, 0] + b_default = np.random.rand(3, 3).astype(bytes) + c_default = np.random.rand(2).astype(np.float32) + + expected_st_a = ( # indices, values, shape + np.empty( + (0, 2), dtype=np.int64), # indices + np.empty( + (0,), dtype=np.int64), # sp_a is DT_INT64 + np.array( + [2, 0], dtype=np.int64)) # batch == 2, max_elems = 0 + + expected_output = { + sparse_name: expected_st_a, + a_name: np.array(2 * [[a_default]]), + b_name: np.array(2 * [b_default]), + c_name: np.array(2 * [c_default]), + } + + self._test( + ops.convert_to_tensor(["", ""]), { + sparse_name: + parsing_ops.VarLenFeature(dtypes.int64), + a_name: + parsing_ops.FixedLenFeature( + (1, 3), dtypes.int64, default_value=a_default), + b_name: + parsing_ops.FixedLenFeature( + (3, 3), dtypes.string, default_value=b_default), + c_name: + parsing_ops.FixedLenFeature( + (2,), dtypes.float32, default_value=c_default), + }, + expected_values=expected_output) + + def testEmptySerializedWithoutDefaultsShouldFail(self): + input_features = { + "st_a": + parsing_ops.VarLenFeature(dtypes.int64), + "a": + parsing_ops.FixedLenFeature( + (1, 3), dtypes.int64, default_value=[0, 42, 0]), + "b": + parsing_ops.FixedLenFeature( + (3, 3), + dtypes.string, + default_value=np.random.rand(3, 3).astype(bytes)), + # Feature "c" is missing a default, this gap will cause failure. + "c": + parsing_ops.FixedLenFeature( + (2,), dtype=dtypes.float32), + } + + # Edge case where the key is there but the feature value is empty + original = example(features=features({"c": feature()})) + self._test( + [original.SerializeToString()], + input_features, + expected_err=(errors_impl.InvalidArgumentError, + "Feature: c \\(data type: float\\) is required")) + + # Standard case of missing key and value. + self._test( + ["", ""], + input_features, + expected_err=(errors_impl.InvalidArgumentError, + "Feature: c \\(data type: float\\) is required")) + + def testDenseNotMatchingShapeShouldFail(self): + original = [ + example(features=features({ + "a": float_feature([1, 1, 3]), + })), example(features=features({ + "a": float_feature([-1, -1]), + })) + ] + + serialized = [m.SerializeToString() for m in original] + + self._test( + ops.convert_to_tensor(serialized), + {"a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32)}, + expected_err=(errors_impl.InvalidArgumentError, + "Key: a, Index: 1. Number of float values")) + + def testDenseDefaultNoShapeShouldFail(self): + original = [example(features=features({"a": float_feature([1, 1, 3]),})),] + + serialized = [m.SerializeToString() for m in original] + + self._test( + ops.convert_to_tensor(serialized), + {"a": parsing_ops.FixedLenFeature(None, dtypes.float32)}, + expected_err=(ValueError, "Missing shape for feature a")) + + def testSerializedContainingSparse(self): + original = [ + example(features=features({ + "st_c": float_feature([3, 4]) + })), + example(features=features({ + "st_c": float_feature([]), # empty float list + })), + example(features=features({ + "st_d": feature(), # feature with nothing in it + })), + example(features=features({ + "st_c": float_feature([1, 2, -1]), + "st_d": bytes_feature([b"hi"]) + })) + ] + + serialized = [m.SerializeToString() for m in original] + + expected_st_c = ( # indices, values, shape + np.array( + [[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64), np.array( + [3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32), np.array( + [4, 3], dtype=np.int64)) # batch == 2, max_elems = 3 + + expected_st_d = ( # indices, values, shape + np.array( + [[3, 0]], dtype=np.int64), np.array( + ["hi"], dtype=bytes), np.array( + [4, 1], dtype=np.int64)) # batch == 2, max_elems = 1 + + expected_output = { + "st_c": expected_st_c, + "st_d": expected_st_d, + } + + self._test( + ops.convert_to_tensor(serialized), { + "st_c": parsing_ops.VarLenFeature(dtypes.float32), + "st_d": parsing_ops.VarLenFeature(dtypes.string) + }, + expected_values=expected_output) + + def testSerializedContainingSparseFeature(self): + original = [ + example(features=features({ + "val": float_feature([3, 4]), + "idx": int64_feature([5, 10]) + })), + example(features=features({ + "val": float_feature([]), # empty float list + "idx": int64_feature([]) + })), + example(features=features({ + "val": feature(), # feature with nothing in it + # missing idx feature + })), + example(features=features({ + "val": float_feature([1, 2, -1]), + "idx": + int64_feature([0, 9, 3]) # unsorted + })) + ] + + serialized = [m.SerializeToString() for m in original] + + expected_sp = ( # indices, values, shape + np.array( + [[0, 5], [0, 10], [3, 0], [3, 3], [3, 9]], dtype=np.int64), + np.array( + [3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32), np.array( + [4, 13], dtype=np.int64)) # batch == 4, max_elems = 13 + + expected_output = {"sp": expected_sp,} + + self._test( + ops.convert_to_tensor(serialized), + {"sp": parsing_ops.SparseFeature(["idx"], "val", dtypes.float32, [13])}, + expected_values=expected_output) + + def testSerializedContainingSparseFeatureReuse(self): + original = [ + example(features=features({ + "val1": float_feature([3, 4]), + "val2": float_feature([5, 6]), + "idx": int64_feature([5, 10]) + })), + example(features=features({ + "val1": float_feature([]), # empty float list + "idx": int64_feature([]) + })), + ] + + serialized = [m.SerializeToString() for m in original] + + expected_sp1 = ( # indices, values, shape + np.array( + [[0, 5], [0, 10]], dtype=np.int64), np.array( + [3.0, 4.0], dtype=np.float32), np.array( + [2, 13], dtype=np.int64)) # batch == 2, max_elems = 13 + + expected_sp2 = ( # indices, values, shape + np.array( + [[0, 5], [0, 10]], dtype=np.int64), np.array( + [5.0, 6.0], dtype=np.float32), np.array( + [2, 7], dtype=np.int64)) # batch == 2, max_elems = 13 + + expected_output = { + "sp1": expected_sp1, + "sp2": expected_sp2, + } + + self._test( + ops.convert_to_tensor(serialized), { + "sp1": + parsing_ops.SparseFeature("idx", "val1", dtypes.float32, 13), + "sp2": + parsing_ops.SparseFeature( + "idx", "val2", dtypes.float32, size=7, already_sorted=True) + }, + expected_values=expected_output) + + def testSerializedContaining3DSparseFeature(self): + original = [ + example(features=features({ + "val": float_feature([3, 4]), + "idx0": int64_feature([5, 10]), + "idx1": int64_feature([0, 2]), + })), + example(features=features({ + "val": float_feature([]), # empty float list + "idx0": int64_feature([]), + "idx1": int64_feature([]), + })), + example(features=features({ + "val": feature(), # feature with nothing in it + # missing idx feature + })), + example(features=features({ + "val": float_feature([1, 2, -1]), + "idx0": int64_feature([0, 9, 3]), # unsorted + "idx1": int64_feature([1, 0, 2]), + })) + ] + + serialized = [m.SerializeToString() for m in original] + + expected_sp = ( + # indices + np.array( + [[0, 5, 0], [0, 10, 2], [3, 0, 1], [3, 3, 2], [3, 9, 0]], + dtype=np.int64), + # values + np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32), + # shape batch == 4, max_elems = 13 + np.array([4, 13, 3], dtype=np.int64)) + + expected_output = {"sp": expected_sp,} + + self._test( + ops.convert_to_tensor(serialized), { + "sp": + parsing_ops.SparseFeature(["idx0", "idx1"], "val", + dtypes.float32, [13, 3]) + }, + expected_values=expected_output) + + def testSerializedContainingDense(self): + aname = "a" + bname = "b*has+a:tricky_name" + original = [ + example(features=features({ + aname: float_feature([1, 1]), + bname: bytes_feature([b"b0_str"]), + })), example(features=features({ + aname: float_feature([-1, -1]), + bname: bytes_feature([b""]), + })) + ] + + serialized = [m.SerializeToString() for m in original] + + expected_output = { + aname: + np.array( + [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1), + bname: + np.array( + ["b0_str", ""], dtype=bytes).reshape(2, 1, 1, 1, 1), + } + + # No defaults, values required + self._test( + ops.convert_to_tensor(serialized), { + aname: + parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32), + bname: + parsing_ops.FixedLenFeature((1, 1, 1, 1), dtype=dtypes.string), + }, + expected_values=expected_output) + + # This test is identical as the previous one except + # for the creation of 'serialized'. + def testSerializedContainingDenseWithConcat(self): + aname = "a" + bname = "b*has+a:tricky_name" + # TODO(lew): Feature appearing twice should be an error in future. + original = [ + (example(features=features({ + aname: float_feature([10, 10]), + })), example(features=features({ + aname: float_feature([1, 1]), + bname: bytes_feature([b"b0_str"]), + }))), + ( + example(features=features({ + bname: bytes_feature([b"b100"]), + })), + example(features=features({ + aname: float_feature([-1, -1]), + bname: bytes_feature([b"b1"]), + })),), + ] + + serialized = [ + m.SerializeToString() + n.SerializeToString() for (m, n) in original + ] + + expected_output = { + aname: + np.array( + [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1), + bname: + np.array( + ["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1), + } + + # No defaults, values required + self._test( + ops.convert_to_tensor(serialized), { + aname: + parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32), + bname: + parsing_ops.FixedLenFeature((1, 1, 1, 1), dtype=dtypes.string), + }, + expected_values=expected_output) + + def testSerializedContainingDenseScalar(self): + original = [ + example(features=features({ + "a": float_feature([1]), + })), example(features=features({})) + ] + + serialized = [m.SerializeToString() for m in original] + + expected_output = { + "a": + np.array( + [[1], [-1]], dtype=np.float32) # 2x1 (column vector) + } + + self._test( + ops.convert_to_tensor(serialized), { + "a": + parsing_ops.FixedLenFeature( + (1,), dtype=dtypes.float32, default_value=-1), + }, + expected_values=expected_output) + + def testSerializedContainingDenseWithDefaults(self): + original = [ + example(features=features({ + "a": float_feature([1, 1]), + })), + example(features=features({ + "b": bytes_feature([b"b1"]), + })), + example(features=features({ + "b": feature() + })), + ] + + serialized = [m.SerializeToString() for m in original] + + expected_output = { + "a": + np.array( + [[1, 1], [3, -3], [3, -3]], dtype=np.float32).reshape(3, 1, 2, + 1), + "b": + np.array( + ["tmp_str", "b1", "tmp_str"], dtype=bytes).reshape(3, 1, 1, 1, + 1), + } + + self._test( + ops.convert_to_tensor(serialized), { + "a": + parsing_ops.FixedLenFeature( + (1, 2, 1), dtype=dtypes.float32, default_value=[3.0, -3.0]), + "b": + parsing_ops.FixedLenFeature( + (1, 1, 1, 1), dtype=dtypes.string, default_value="tmp_str"), + }, + expected_values=expected_output) + + def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self): + expected_st_a = ( # indices, values, shape + np.empty( + (0, 2), dtype=np.int64), # indices + np.empty( + (0,), dtype=np.int64), # sp_a is DT_INT64 + np.array( + [2, 0], dtype=np.int64)) # batch == 2, max_elems = 0 + expected_sp = ( # indices, values, shape + np.array( + [[0, 0], [0, 3], [1, 7]], dtype=np.int64), np.array( + ["a", "b", "c"], dtype="|S"), np.array( + [2, 13], dtype=np.int64)) # batch == 4, max_elems = 13 + + original = [ + example(features=features({ + "c": float_feature([3, 4]), + "val": bytes_feature([b"a", b"b"]), + "idx": int64_feature([0, 3]) + })), example(features=features({ + "c": float_feature([1, 2]), + "val": bytes_feature([b"c"]), + "idx": int64_feature([7]) + })) + ] + + serialized = [m.SerializeToString() for m in original] + + a_default = [1, 2, 3] + b_default = np.random.rand(3, 3).astype(bytes) + expected_output = { + "st_a": expected_st_a, + "sp": expected_sp, + "a": np.array(2 * [[a_default]]), + "b": np.array(2 * [b_default]), + "c": np.array( + [[3, 4], [1, 2]], dtype=np.float32), + } + + self._test( + ops.convert_to_tensor(serialized), + { + "st_a": + parsing_ops.VarLenFeature(dtypes.int64), + "sp": + parsing_ops.SparseFeature("idx", "val", dtypes.string, 13), + "a": + parsing_ops.FixedLenFeature( + (1, 3), dtypes.int64, default_value=a_default), + "b": + parsing_ops.FixedLenFeature( + (3, 3), dtypes.string, default_value=b_default), + # Feature "c" must be provided, since it has no default_value. + "c": + parsing_ops.FixedLenFeature((2,), dtypes.float32), + }, + expected_values=expected_output) + + def testSerializedContainingSparseAndSparseFeatureWithReuse(self): + expected_idx = ( # indices, values, shape + np.array( + [[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64), + np.array([0, 3, 7, 1]), np.array( + [2, 2], dtype=np.int64)) # batch == 4, max_elems = 2 + + expected_sp = ( # indices, values, shape + np.array( + [[0, 0], [0, 3], [1, 1], [1, 7]], dtype=np.int64), np.array( + ["a", "b", "d", "c"], dtype="|S"), np.array( + [2, 13], dtype=np.int64)) # batch == 4, max_elems = 13 + + original = [ + example(features=features({ + "val": bytes_feature([b"a", b"b"]), + "idx": int64_feature([0, 3]) + })), example(features=features({ + "val": bytes_feature([b"c", b"d"]), + "idx": int64_feature([7, 1]) + })) + ] + + serialized = [m.SerializeToString() for m in original] + + expected_output = { + "idx": expected_idx, + "sp": expected_sp, + } + + self._test( + ops.convert_to_tensor(serialized), { + "idx": + parsing_ops.VarLenFeature(dtypes.int64), + "sp": + parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]), + }, + expected_values=expected_output) + + def _testSerializedContainingVarLenDenseLargerBatch(self, batch_size): + # During parsing, data read from the serialized proto is stored in buffers. + # For small batch sizes, a buffer will contain one minibatch entry. + # For larger batch sizes, a buffer may contain several minibatch + # entries. This test identified a bug where the code that copied + # data out of the buffers and into the output tensors assumed each + # buffer only contained one minibatch entry. The bug has since been fixed. + truth_int = [i for i in range(batch_size)] + truth_str = [[("foo%d" % i).encode(), ("bar%d" % i).encode()] + for i in range(batch_size)] + + expected_str = copy.deepcopy(truth_str) + + # Delete some intermediate entries + for i in range(batch_size): + col = 1 + if np.random.rand() < 0.25: + # w.p. 25%, drop out the second entry + expected_str[i][col] = b"default" + col -= 1 + truth_str[i].pop() + if np.random.rand() < 0.25: + # w.p. 25%, drop out the second entry (possibly again) + expected_str[i][col] = b"default" + truth_str[i].pop() + + expected_output = { + # Batch size batch_size, 1 time step. + "a": np.array(truth_int, dtype=np.int64).reshape(batch_size, 1), + # Batch size batch_size, 2 time steps. + "b": np.array(expected_str, dtype="|S").reshape(batch_size, 2), + } + + original = [ + example(features=features( + {"a": int64_feature([truth_int[i]]), + "b": bytes_feature(truth_str[i])})) + for i in range(batch_size) + ] + + serialized = [m.SerializeToString() for m in original] + + self._test( + ops.convert_to_tensor(serialized, dtype=dtypes.string), { + "a": + parsing_ops.FixedLenSequenceFeature( + shape=(), + dtype=dtypes.int64, + allow_missing=True, + default_value=-1), + "b": + parsing_ops.FixedLenSequenceFeature( + shape=[], + dtype=dtypes.string, + allow_missing=True, + default_value="default"), + }, + expected_values=expected_output) + + def testSerializedContainingVarLenDenseLargerBatch(self): + np.random.seed(3456) + for batch_size in (1, 10, 20, 100, 256): + self._testSerializedContainingVarLenDenseLargerBatch(batch_size) + + def testSerializedContainingVarLenDense(self): + aname = "a" + bname = "b" + cname = "c" + dname = "d" + original = [ + example(features=features({ + cname: int64_feature([2]), + })), + example(features=features({ + aname: float_feature([1, 1]), + bname: bytes_feature([b"b0_str", b"b1_str"]), + })), + example(features=features({ + aname: float_feature([-1, -1, 2, 2]), + bname: bytes_feature([b"b1"]), + })), + example(features=features({ + aname: float_feature([]), + cname: int64_feature([3]), + })), + ] + + serialized = [m.SerializeToString() for m in original] + + expected_output = { + aname: + np.array( + [ + [0, 0, 0, 0], + [1, 1, 0, 0], + [-1, -1, 2, 2], + [0, 0, 0, 0], + ], + dtype=np.float32).reshape(4, 2, 2, 1), + bname: + np.array( + [["", ""], ["b0_str", "b1_str"], ["b1", ""], ["", ""]], + dtype=bytes).reshape(4, 2, 1, 1, 1), + cname: + np.array([2, 0, 0, 3], dtype=np.int64).reshape(4, 1), + dname: + np.empty(shape=(4, 0), dtype=bytes), + } + + self._test( + ops.convert_to_tensor(serialized), { + aname: + parsing_ops.FixedLenSequenceFeature( + (2, 1), dtype=dtypes.float32, allow_missing=True), + bname: + parsing_ops.FixedLenSequenceFeature( + (1, 1, 1), dtype=dtypes.string, allow_missing=True), + cname: + parsing_ops.FixedLenSequenceFeature( + shape=[], dtype=dtypes.int64, allow_missing=True), + dname: + parsing_ops.FixedLenSequenceFeature( + shape=[], dtype=dtypes.string, allow_missing=True), + }, + expected_values=expected_output) + + # Test with padding values. + expected_output_custom_padding = dict(expected_output) + expected_output_custom_padding[aname] = np.array( + [ + [-2, -2, -2, -2], + [1, 1, -2, -2], + [-1, -1, 2, 2], + [-2, -2, -2, -2], + ], + dtype=np.float32).reshape(4, 2, 2, 1) + + self._test( + ops.convert_to_tensor(serialized), { + aname: + parsing_ops.FixedLenSequenceFeature( + (2, 1), + dtype=dtypes.float32, + allow_missing=True, + default_value=-2.0), + bname: + parsing_ops.FixedLenSequenceFeature( + (1, 1, 1), dtype=dtypes.string, allow_missing=True), + cname: + parsing_ops.FixedLenSequenceFeature( + shape=[], dtype=dtypes.int64, allow_missing=True), + dname: + parsing_ops.FixedLenSequenceFeature( + shape=[], dtype=dtypes.string, allow_missing=True), + }, expected_output_custom_padding) + + # Change number of required values so the inputs are not a + # multiple of this size. + self._test( + ops.convert_to_tensor(serialized), { + aname: + parsing_ops.FixedLenSequenceFeature( + (2, 1), dtype=dtypes.float32, allow_missing=True), + bname: + parsing_ops.FixedLenSequenceFeature( + (2, 1, 1), dtype=dtypes.string, allow_missing=True), + }, + expected_err=( + errors_impl.OpError, "Key: b, Index: 2. " + "Number of bytes values is not a multiple of stride length.")) + + self._test( + ops.convert_to_tensor(serialized), { + aname: + parsing_ops.FixedLenSequenceFeature( + (2, 1), + dtype=dtypes.float32, + allow_missing=True, + default_value=[]), + bname: + parsing_ops.FixedLenSequenceFeature( + (2, 1, 1), dtype=dtypes.string, allow_missing=True), + }, + expected_err=(ValueError, + "Cannot reshape a tensor with 0 elements to shape")) + + self._test( + ops.convert_to_tensor(serialized), { + aname: + parsing_ops.FixedLenFeature((None, 2, 1), dtype=dtypes.float32), + bname: + parsing_ops.FixedLenSequenceFeature( + (2, 1, 1), dtype=dtypes.string, allow_missing=True), + }, + expected_err=(ValueError, + "First dimension of shape for feature a unknown. " + "Consider using FixedLenSequenceFeature.")) + + self._test( + ops.convert_to_tensor(serialized), { + cname: + parsing_ops.FixedLenFeature( + (1, None), dtype=dtypes.int64, default_value=[[1]]), + }, + expected_err=(ValueError, + "All dimensions of shape for feature c need to be known " + r"but received \(1, None\).")) + + self._test( + ops.convert_to_tensor(serialized), { + aname: + parsing_ops.FixedLenSequenceFeature( + (2, 1), dtype=dtypes.float32, allow_missing=True), + bname: + parsing_ops.FixedLenSequenceFeature( + (1, 1, 1), dtype=dtypes.string, allow_missing=True), + cname: + parsing_ops.FixedLenSequenceFeature( + shape=[], dtype=dtypes.int64, allow_missing=False), + dname: + parsing_ops.FixedLenSequenceFeature( + shape=[], dtype=dtypes.string, allow_missing=True), + }, + expected_err=(ValueError, + "Unsupported: FixedLenSequenceFeature requires " + "allow_missing to be True.")) + + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index ad9378dfb9..d540ba470a 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -80,6 +80,7 @@ py_library( ":batching", ":gen_dataset_ops", ":interleave_ops", + ":parsing_ops", ":shuffle_ops", ":stats_ops", "//tensorflow/python:constant_op", @@ -87,10 +88,7 @@ py_library( "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:parsing_ops", "//tensorflow/python:platform", - "//tensorflow/python:string_ops", "//tensorflow/python:tensor_shape", "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", @@ -210,6 +208,22 @@ py_library( ], ) +py_library( + name = "parsing_ops", + srcs = ["parsing_ops.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + ], +) + py_library( name = "map_defun", srcs = ["map_defun.py"], diff --git a/tensorflow/contrib/data/python/ops/parsing_ops.py b/tensorflow/contrib/data/python/ops/parsing_ops.py new file mode 100644 index 0000000000..f868653554 --- /dev/null +++ b/tensorflow/contrib/data/python/ops/parsing_ops.py @@ -0,0 +1,152 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Experimental `dataset` API for parsing example.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import nest +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import parsing_ops + + +class _ParseExampleDataset(dataset_ops.Dataset): + """A `Dataset` that parses `example` dataset into a `dict` dataset.""" + + def __init__(self, input_dataset, features, num_parallel_calls): + super(_ParseExampleDataset, self).__init__() + self._input_dataset = input_dataset + if not all(types == dtypes.string + for types in nest.flatten(input_dataset.output_types)): + raise TypeError("Input dataset should be a dataset of vectors of strings") + self._num_parallel_calls = num_parallel_calls + # pylint: disable=protected-access + self._features = parsing_ops._prepend_none_dimension(features) + # sparse_keys and dense_keys come back sorted here. + (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults, + dense_shapes) = parsing_ops._features_to_raw_params( + self._features, [ + parsing_ops.VarLenFeature, parsing_ops.SparseFeature, + parsing_ops.FixedLenFeature, parsing_ops.FixedLenSequenceFeature + ]) + # TODO(b/112859642): Pass sparse_index and sparse_values for SparseFeature. + (_, dense_defaults_vec, sparse_keys, sparse_types, dense_keys, dense_shapes, + dense_shape_as_shape) = parsing_ops._process_raw_parameters( + None, dense_defaults, sparse_keys, sparse_types, dense_keys, + dense_types, dense_shapes) + # pylint: enable=protected-access + self._sparse_keys = sparse_keys + self._sparse_types = sparse_types + self._dense_keys = dense_keys + self._dense_defaults = dense_defaults_vec + self._dense_shapes = dense_shapes + self._dense_types = dense_types + dense_output_shapes = [ + self._input_dataset.output_shapes.concatenate(shape) + for shape in dense_shape_as_shape + ] + sparse_output_shapes = [ + self._input_dataset.output_shapes.concatenate([None]) + for _ in range(len(sparse_keys)) + ] + + self._output_shapes = dict( + zip(self._dense_keys + self._sparse_keys, + dense_output_shapes + sparse_output_shapes)) + self._output_types = dict( + zip(self._dense_keys + self._sparse_keys, + self._dense_types + self._sparse_types)) + self._output_classes = dict( + zip(self._dense_keys + self._sparse_keys, + [ops.Tensor for _ in range(len(self._dense_defaults))] + + [sparse_tensor.SparseTensor for _ in range(len(self._sparse_keys)) + ])) + + def _as_variant_tensor(self): + return gen_dataset_ops.parse_example_dataset( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + self._num_parallel_calls, + self._dense_defaults, + self._sparse_keys, + self._dense_keys, + self._sparse_types, + self._dense_shapes, + **dataset_ops.flat_structure(self)) + + @property + def output_shapes(self): + return self._output_shapes + + @property + def output_types(self): + return self._output_types + + @property + def output_classes(self): + return self._output_classes + + +# TODO(b/38416882): Properly export in the `tf.contrib.data` API when stable +# or make private / remove. +# TODO(b/111553342): add arguments names and example names as well. +def parse_example_dataset(features, num_parallel_calls=1): + """A transformation that parses `Example` protos into a `dict` of tensors. + + Parses a number of serialized `Example` protos given in `serialized`. We refer + to `serialized` as a batch with `batch_size` many entries of individual + `Example` protos. + + This op parses serialized examples into a dictionary mapping keys to `Tensor` + and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`, + `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature` + and `SparseFeature` is mapped to a `SparseTensor`, and each + `FixedLenFeature` is mapped to a `Tensor`. See `tf.parse_example` for more + details about feature dictionaries. + + Args: + features: A `dict` mapping feature keys to `FixedLenFeature`, + `VarLenFeature`, and `SparseFeature` values. + num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`, + representing the number of parsing processes to call in parallel. + + Returns: + A dataset transformation function, which can be passed to + `tf.data.Dataset.apply`. + + Raises: + ValueError: if features argument is None. + """ + if features is None: + raise ValueError("Missing: features was %s." % features) + + def _apply_fn(dataset): + """Function from `Dataset` to `Dataset` that applies the transformation.""" + out_dataset = _ParseExampleDataset(dataset, features, num_parallel_calls) + if any([ + isinstance(feature, parsing_ops.SparseFeature) + for _, feature in features.items() + ]): + # pylint: disable=protected-access + # pylint: disable=g-long-lambda + out_dataset = out_dataset.map( + lambda x: parsing_ops._construct_sparse_tensors_for_sparse_features( + features, x), num_parallel_calls=num_parallel_calls) + return out_dataset + + return _apply_fn diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index 3882d4bfdb..151f12b082 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -25,6 +25,7 @@ import numpy as np from tensorflow.contrib.data.python.ops import batching from tensorflow.contrib.data.python.ops import gen_dataset_ops as contrib_gen_dataset_ops from tensorflow.contrib.data.python.ops import interleave_ops +from tensorflow.contrib.data.python.ops import parsing_ops from tensorflow.contrib.data.python.ops import shuffle_ops from tensorflow.contrib.data.python.ops import stats_ops from tensorflow.python.data.ops import dataset_ops @@ -37,7 +38,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import file_io from tensorflow.python.ops import gen_dataset_ops -from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import gfile from tensorflow.python.util import deprecation @@ -788,9 +788,9 @@ def make_batched_features_dataset(file_pattern, batch_size, drop_remainder=drop_final_batch or num_epochs is None) # Parse `Example` tensors to a dictionary of `Feature` tensors. - dataset = dataset.map( - lambda x: parsing_ops.parse_example(x, features), - num_parallel_calls=parser_num_threads) + dataset = dataset.apply( + parsing_ops.parse_example_dataset( + features, num_parallel_calls=parser_num_threads)) # TODO(rachelim): Add an optional label_name argument for extracting the label # from the features dictionary, to comply with the type expected by the diff --git a/tensorflow/core/api_def/base_api/api_def_ParseExampleDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ParseExampleDataset.pbtxt new file mode 100644 index 0000000000..3de2f18fc2 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ParseExampleDataset.pbtxt @@ -0,0 +1,69 @@ +op { + graph_op_name: "ParseExampleDataset" + in_arg { + name: "dense_defaults" + description: <